bit-string/src/lib.zig

//! A library to check and extract values from integers based on a "bit string". Primarily intended for (my) emulator instruction decoding, but maybe someone else can find a use for it?
//!
//! ## Example
//! ```zig
//! const std = @import("std");
//! test "doc test" {
//!     const value: u8 = 0b10001011;
//!
//!     try std.testing.expectEqual(true, match("10001011", value));
//!     try std.testing.expectEqual(false, match("11111011", value));
//!     try std.testing.expectEqual(true, match("1---1011", value));
//!
//!     {
//!         const ret = extract("1000aaaa", value);
//!         try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
//!     }
//!     {
//!         const ret = extract("1aaa1aaa", value);
//!         try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
//!     }
//!     {
//!         const ret = extract("1---abcd", value);
//!         try std.testing.expectEqual(@as(u3, 0b1), ret.a);
//!         try std.testing.expectEqual(@as(u3, 0b0), ret.b);
//!         try std.testing.expectEqual(@as(u3, 0b1), ret.c);
//!         try std.testing.expectEqual(@as(u3, 0b1), ret.d);
//!     }
//! }
//! ```
//! ## Syntax
//! |  Token  |  Meaning  | Description
//! | ------- | --------- | -----------
//! | `0`     | Unset bit | In the equivalent position, the value's bit must be set.
//! | `1`     | Set bit   | In the equivalent position, the value's bit must be set.
//! | `a..=z` | Variable  | Given the 4-bit bit string, `"1aa0"`, the value `0b1010` would produce the variable `a` with the value `0b01`
//! | `-`     | Ignored   | In the equivalent position, the value's bit does not matter.
//!
//! ## Notes
//! - This library does the majority of it's work at `comptime`. Due to this, you cannot create strings to match against at runtime.
//! - Variables do not have to be "sequential". This means the 5-bit bit string `"1aa0a"` with the value `0b10101` will produce the variable `a` with the value `0b011`.

const std = @import("std");
const Log2Int = std.math.Log2Int;

/// Test to see if a value matches the provided bit-string
///
/// ### Example
/// ```zig
/// match("1100", @as(u4, 0b1100)) // true
/// match("1100", @as(u4, 0b1110)) // false
/// match("1--0", @as(u4, 0b1010)) // true
/// match("1ab0", @as(u4, 0b1010)) // true
/// ```
pub fn match(comptime bit_string: []const u8, value: anytype) bool {
    const ValT = @TypeOf(value);
    comptime verify(ValT, bit_string);

    const masks: struct { ValT, ValT } = comptime blk: {
        const bit_count = @typeInfo(ValT).Int.bits;

        var set: ValT = 0;
        var clr: ValT = 0;

        // FIXME: I linear search bit_string 4 separate times. Consider doing a single search and compromizing on memory + stateless API? (imagine a "regex compile"-like API)
        for (bit_string, 0..) |char, i| {
            switch (char) {
                '0' => clr |= @as(ValT, 1) << @intCast(bit_count - 1 - i),
                '1' => set |= @as(ValT, 1) << @intCast(bit_count - 1 - i),
                'a'...'z', '-' => continue,
                else => @compileError("'" ++ [_]u8{char} ++ "' was unexpected when parsing bitstring"),
            }
        }

        break :blk .{ set, clr };
    };

    const set_mask = masks[0];
    const clr_mask = masks[1];

    return (value & set_mask) == set_mask and (~value & clr_mask) == clr_mask;
}

test "match" {
    // doc tests
    try std.testing.expectEqual(true, match("1100", @as(u4, 0b1100))); // true
    try std.testing.expectEqual(false, match("1100", @as(u4, 0b1110))); // false
    try std.testing.expectEqual(true, match("1--0", @as(u4, 0b1010))); // true
    try std.testing.expectEqual(true, match("1ab0", @as(u4, 0b1010))); // true

    // other tests
    try std.testing.expectEqual(true, match("11111111", @as(u8, 0b11111111)));
    try std.testing.expectEqual(true, match("10110011", @as(u8, 0b10110011)));
    try std.testing.expectEqual(true, match("101aaabb", @as(u8, 0b10110001)));
    try std.testing.expectEqual(true, match("abcdefgh", @as(u8, 0b10110101)));
    try std.testing.expectEqual(true, match("aaa---11", @as(u8, 0b01011111)));
    try std.testing.expectEqual(true, match("1a0b1c0d", @as(u8, 0b10011101)));
    try std.testing.expectEqual(false, match("aaa---11", @as(u8, 0b01011110)));
}

/// Extracts the variables (defined in the bit string) from a value.
///
/// ### Examples
/// ```
/// const ret = extract("aaaa", @as(u4, 0b1001)); // ret.a == 0b1001
/// const ret = extract("abcd", @as(u4, 0b1001)); // ret.a == 0b1, ret.b == 0b0, ret.c == 0b0, ret.d == 0b1
/// const ret = extract("a0ab", @as(u4, 0b1001)); // ret.a == 0b10, ret.b == 0b1
/// const ret = extract("-a-a", @as(u4, 0b1001)); // ret.a == 0b01
/// ```
///
/// Note: In Debug and ReleaseSafe builds, there's a runtime assert that
/// ensures that the value matches against the bit string.
pub fn extract(comptime bit_string: []const u8, value: anytype) Bitfield(bit_string) {
    const builtin = @import("builtin");

    const ValT = @TypeOf(value);
    const ReturnT = Bitfield(bit_string);
    const bmi2 = switch (builtin.target.cpu.arch) {
        .x86_64 => std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2),
        else => false,
    };
    comptime verify(ValT, bit_string);

    std.debug.assert(match(bit_string, value)); // prevents branchless impl in ReleaseSafe

    var ret: ReturnT = undefined;

    inline for (@typeInfo(ReturnT).Struct.fields) |field| {
        @field(ret, field.name) = blk: {
            var masked_val: ValT = 0;

            for (bit_string, 0..) |char, i| {
                const rev = @typeInfo(ValT).Int.bits - 1 - i;
                if (char == field.name[0]) masked_val |= @as(ValT, 1) << @intCast(rev); // no penalty
            }

            // TODO: decide at compile time if we're calling the 32-bit or 64-bit version of `PEXT`

            // invariant: the bit count in the field we're writing to and the
            // # of bits we happened to find in this linear search are identical
            //
            // we're confident in this because it's guaranteed to be the same bit_string,
            // and it's the same linear search. If you're reading this double check that this is still the case lol
            break :blk @truncate(if (bmi2) pext.hardware(u32, value, masked_val) else pext.software(u32, value, masked_val));
        };
    }

    return ret;
}

test "extract" {
    // doc tests
    {
        const ret = extract("aaaa", @as(u4, 0b1001));
        try std.testing.expectEqual(@as(u4, 0b1001), ret.a);
    }
    {
        const ret = extract("abcd", @as(u4, 0b1001));
        try std.testing.expectEqual(@as(u1, 0b1), ret.a);
        try std.testing.expectEqual(@as(u1, 0b0), ret.b);
        try std.testing.expectEqual(@as(u1, 0b0), ret.c);
        try std.testing.expectEqual(@as(u1, 0b1), ret.d);
    }
    {
        const ret = extract("a0ab", @as(u4, 0b1001));
        try std.testing.expectEqual(@as(u2, 0b10), ret.a);
        try std.testing.expectEqual(@as(u1, 0b01), ret.b);
    }
    {
        const ret = extract("-a-a", @as(u4, 0b1001));
        try std.testing.expectEqual(@as(u2, 0b01), ret.a);
    }

    // other tests
    {
        const ret = extract("10aaabbc", @as(u8, 0b10110011));
        try std.testing.expectEqual(@as(u3, 0b110), ret.a);
        try std.testing.expectEqual(@as(u2, 0b01), ret.b);
        try std.testing.expectEqual(@as(u1, 0b1), ret.c);
    }
    {
        const ret = extract("1111abababab1010", @as(u16, 0b1111_1110_1101_1010));
        try std.testing.expectEqual(@as(u4, 0b1110), ret.a);
        try std.testing.expectEqual(@as(u4, 0b1011), ret.b);
    }
}

/// Parses a bit string and reifies a struct that will contain fields that correspond to the variables present in the bit string.
///
///
/// Note: If it weren't for the return type of `extract()`, this type would be a private implementation detail
///
/// TODO: I will probably rename this type
pub fn Bitfield(comptime bit_string: []const u8) type {
    const StructField = std.builtin.Type.StructField;

    const alphabet_set: u26 = tmp: {
        var bit_set: u26 = 0;

        for (bit_string) |char| {
            switch (char) {
                'a'...'z' => |c| bit_set |= @as(u26, 1) << @intCast(c - 'a'),
                else => continue,
            }
        }

        break :tmp bit_set;
    };

    const field_len = @popCount(alphabet_set);

    const fields = blk: {
        var tmp: [field_len]StructField = undefined;

        const Tmp = struct { bits: u8 = 0, char: ?u8 = null };
        var things: [field_len]Tmp = [_]Tmp{.{}} ** field_len;

        for (bit_string) |char| {
            switch (char) {
                'a'...'z' => |c| {
                    const bit_in_set = @as(u26, 1) << @intCast(c - 'a');
                    const pos = @ctz(alphabet_set & ~(bit_in_set - 1));

                    things[pos].bits += 1;
                    things[pos].char = c;
                },
                '1', '0', '-' => continue,
                else => @compileError("error when parsing bitset string"),
            }
        }

        for (things, &tmp) |th, *field| {
            const FieldInt = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = th.bits } });

            field.* = .{
                .name = &.{th.char.?},
                .type = FieldInt,
                .default_value = null,
                .is_comptime = false,
                .alignment = @alignOf(FieldInt),
            };
        }

        break :blk tmp;
    };

    return @Type(.{ .Struct = .{
        .layout = .Auto,
        .fields = &fields,
        .decls = &.{},
        .is_tuple = false,
    } });
}

fn verify(comptime T: type, comptime bit_string: []const u8) void {
    const info = @typeInfo(T);

    // FIXME: remove the need for this
    if (info.Int.bits > 32) @compileError("TODO: 64-bit `PEXT` software implementation");

    std.debug.assert(info != .ComptimeInt);
    std.debug.assert(info.Int.signedness == .unsigned);
    std.debug.assert(info.Int.bits <= 64); // x86 PEXT u32 and u64 operands only
    std.debug.assert(bit_string.len == info.Int.bits); // TODO: Support Underscores?
}

const pext = struct {
    fn hardware(comptime T: type, value: T, mask: T) T {
        return switch (T) {
            u32 => asm ("pextl %[mask], %[value], %[ret]"
                : [ret] "=r" (-> T),
                : [value] "r" (value),
                  [mask] "r" (mask),
            ),
            u64 => asm ("pextq %[mask], %[value], %[ret]"
                : [ret] "=r" (-> T),
                : [value] "r" (value),
                  [mask] "r" (mask),
            ),
            else => @compileError("pext is sunsupported for " ++ @typeName(T) ++ "."),
        };
    }

    // why we need this: https://github.com/ziglang/zig/issues/14995 (ideally compiler-rt implements this for us)
    fn software(comptime T: type, value: T, mask: T) T {
        return switch (T) {
            u32 => {
                // TODO: Looks (and is) like C code :pensive:
                // code source: https://stackoverflow.com/questions/41720249/detecting-matching-bits-in-c

                var _value: T = value;
                var _mask: T = mask;

                _value &= _mask;
                var mk: T = ~_mask << 1;
                var mp: T = undefined;
                var mv: T = undefined;
                var t: T = undefined;

                inline for (0..@typeInfo(u5).Int.bits) |i| {
                    mp = mk ^ (mk << 1); // parallel suffix
                    mp = mp ^ (mp << 2);
                    mp = mp ^ (mp << 4);
                    mp = mp ^ (mp << 8);
                    mp = mp ^ (mp << 16);
                    mv = (mp & _mask); // bits to move
                    _mask = ((_mask ^ mv) | (mv >> (1 << i))); // compress _mask
                    t = (_value & mv);
                    _value = ((_value ^ t) | (t >> (1 << i))); // compress _value
                    mk &= ~mp;
                }

                return _value;
            },
            u64 => @compileError("TODO: find/write branchless software impl of `PEXT` for 64-bit values"),
            else => @compileError("pext is sunsupported for " ++ @typeName(T) ++ "."),
        };
    }

    test "pext" {
        const builtin = @import("builtin");

        switch (builtin.cpu.arch) {
            .x86_64 => if (std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2)) {
                try std.testing.expectEqual(@as(u32, 0x0001_2567), pext.hardware(u32, 0x12345678, 0xFF00FFF0));
                try std.testing.expectEqual(@as(u64, 0x0001_2567), pext.hardware(u64, 0x12345678, 0xFF00FFF0));

                // random tests
                // TODO: when implemented, test 64-bit fallback `PEXT` as well
                var rand_impl = std.rand.DefaultPrng.init(0xBAADF00D_DEADCAFE);
                for (0..100) |_| {
                    const value = rand_impl.random().int(u32);
                    const mask = rand_impl.random().int(u32);

                    try std.testing.expectEqual(pext.hardware(u32, value, mask), pext.software(u32, value, mask));
                }
            },
            else => {},
        }

        // example values from: https://en.wikipedia.org/w/index.php?title=X86_Bit_manipulation_instruction_set&oldid=1170426748
        try std.testing.expectEqual(@as(u32, 0x0001_2567), pext.software(u32, 0x12345678, 0xFF00FFF0));
    }
};

test "doc test" {
    const value: u8 = 0b10001011;

    try std.testing.expectEqual(true, match("10001011", value));
    try std.testing.expectEqual(false, match("11111011", value));
    try std.testing.expectEqual(true, match("1---1011", value));

    {
        const ret = extract("1000aaaa", value);
        try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
    }
    {
        const ret = extract("1aaa1aaa", value);
        try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
    }
    {
        const ret = extract("1---abcd", value);
        try std.testing.expectEqual(@as(u3, 0b1), ret.a);
        try std.testing.expectEqual(@as(u3, 0b0), ret.b);
        try std.testing.expectEqual(@as(u3, 0b1), ret.c);
        try std.testing.expectEqual(@as(u3, 0b1), ret.d);
    }
}