419 lines
15 KiB
Zig
419 lines
15 KiB
Zig
//! A library to check and extract values from integers based on a "bit string". Primarily intended for (my) emulator instruction decoding, but maybe someone else can find a use for it?
|
|
//!
|
|
//! ## Example
|
|
//! ```zig
|
|
//! const std = @import("std");
|
|
//!
|
|
//! test "doc test" {
|
|
//! const value: u8 = 0b10001011;
|
|
//!
|
|
//! try std.testing.expectEqual(true, match("1000_1011", value));
|
|
//! try std.testing.expectEqual(false, match("11111011", value));
|
|
//! try std.testing.expectEqual(true, match("1---1011", value));
|
|
//!
|
|
//! {
|
|
//! const ret = extract("1000aaaa", value);
|
|
//! try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
|
|
//! }
|
|
//! {
|
|
//! const ret = extract("1aaa1aaa", value);
|
|
//! try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
|
|
//! }
|
|
//! {
|
|
//! const ret = extract("1---abcd", value);
|
|
//! try std.testing.expectEqual(@as(u3, 0b1), ret.a);
|
|
//! try std.testing.expectEqual(@as(u3, 0b0), ret.b);
|
|
//! try std.testing.expectEqual(@as(u3, 0b1), ret.c);
|
|
//! try std.testing.expectEqual(@as(u3, 0b1), ret.d);
|
|
//! }
|
|
//! }
|
|
//! ```
|
|
//! ## Syntax
|
|
//! | Token | Meaning | Description
|
|
//! | ------- | --------- | -----------
|
|
//! | `0` | Clear bit | In the equivalent position, the value's bit must be cleared.
|
|
//! | `1` | Set bit | In the equivalent position, the value's bit must be set.
|
|
//! | `a..=z` | Variable | Given the 4-bit bit string, `"1aa0"`, the value `0b1010` would produce the variable `a` with the value `0b01`
|
|
//! | `-` | Ignored | In the equivalent position, the value's bit does not matter.
|
|
//! | `_` | Ignored* | Underscores are completely ignored during parsing, use to make bit strings easier to read e.g. `1111_1111`
|
|
//!
|
|
//! ## Notes
|
|
//! - This library does the majority of it's work at `comptime`. Due to this, you cannot create strings to match against at runtime.
|
|
//! - Variables do not have to be "sequential". This means the 5-bit bit string `"1aa0a"` with the value `0b10101` will produce the variable `a` with the value `0b011`.
|
|
|
|
const std = @import("std");
|
|
const Log2Int = std.math.Log2Int;
|
|
|
|
/// Test to see if a value matches the provided bit-string
|
|
pub fn match(comptime bit_string: []const u8, value: anytype) bool {
|
|
@setEvalBranchQuota(std.math.maxInt(u32)); // FIXME: bad practice
|
|
|
|
const ValT = @TypeOf(value);
|
|
comptime verify(ValT, bit_string);
|
|
|
|
const masks: struct { ValT, ValT } = comptime blk: {
|
|
const bit_count = @typeInfo(ValT).int.bits;
|
|
|
|
var set: ValT = 0;
|
|
var clr: ValT = 0;
|
|
var offset = 0;
|
|
|
|
// FIXME: I linear search like this 5 times across the entire lib. Consider structuring this like a regex lib (compiling a match)
|
|
for (bit_string, 0..) |char, i| {
|
|
switch (char) {
|
|
'0' => clr |= @as(ValT, 1) << @intCast((bit_count - 1 - (i - offset))),
|
|
'1' => set |= @as(ValT, 1) << @intCast((bit_count - 1 - (i - offset))),
|
|
'_' => offset += 1,
|
|
'a'...'z', '-' => continue,
|
|
else => @compileError("'" ++ [_]u8{char} ++ "' was unexpected when parsing bitstring"),
|
|
}
|
|
}
|
|
|
|
break :blk .{ set, clr };
|
|
};
|
|
|
|
const set_mask = masks[0];
|
|
const clr_mask = masks[1];
|
|
|
|
return (value & set_mask) == set_mask and (~value & clr_mask) == clr_mask;
|
|
}
|
|
|
|
test match {
|
|
// doc tests
|
|
try std.testing.expectEqual(true, match("1100", @as(u4, 0b1100)));
|
|
try std.testing.expectEqual(false, match("1100", @as(u4, 0b1110)));
|
|
|
|
try std.testing.expectEqual(true, match("1--0", @as(u4, 0b1010)));
|
|
try std.testing.expectEqual(true, match("1ab0", @as(u4, 0b1010)));
|
|
try std.testing.expectEqual(true, match("11_00", @as(u4, 0b1100)));
|
|
|
|
// other tests
|
|
try std.testing.expectEqual(true, match("11111111", @as(u8, 0b11111111)));
|
|
try std.testing.expectEqual(true, match("10110011", @as(u8, 0b10110011)));
|
|
try std.testing.expectEqual(true, match("101aaabb", @as(u8, 0b10110001)));
|
|
try std.testing.expectEqual(true, match("abcdefgh", @as(u8, 0b10110101)));
|
|
try std.testing.expectEqual(true, match("aaa---11", @as(u8, 0b01011111)));
|
|
try std.testing.expectEqual(true, match("1a0b1c0d", @as(u8, 0b10011101)));
|
|
try std.testing.expectEqual(false, match("aaa---11", @as(u8, 0b01011110)));
|
|
|
|
try std.testing.expectEqual(true, match("1111_1111", @as(u8, 0b11111111)));
|
|
try std.testing.expectEqual(true, match("________11111111", @as(u8, 0b11111111)));
|
|
try std.testing.expectEqual(true, match("11111111________", @as(u8, 0b11111111)));
|
|
|
|
try std.testing.expectEqual(true, match(
|
|
"11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111",
|
|
@as(u64, 0xFFFF_FFFF_FFFF_FFFF),
|
|
));
|
|
}
|
|
|
|
/// Extracts the variables (defined in the bit string) from a value.
|
|
///
|
|
/// Note: In Debug and ReleaseSafe builds, there's a runtime assert that
|
|
/// ensures that the value matches against the bit string.
|
|
pub fn extract(comptime bit_string: []const u8, value: anytype) Bitfield(bit_string) {
|
|
const builtin = @import("builtin");
|
|
|
|
const ValT = @TypeOf(value);
|
|
const ReturnT = Bitfield(bit_string);
|
|
const bmi2 = switch (builtin.target.cpu.arch) {
|
|
.x86_64 => std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2),
|
|
else => false,
|
|
};
|
|
comptime verify(ValT, bit_string);
|
|
|
|
var ret: ReturnT = undefined;
|
|
|
|
inline for (@typeInfo(ReturnT).@"struct".fields) |field| {
|
|
@field(ret, field.name) = blk: {
|
|
var masked_val: ValT = 0;
|
|
var offset: usize = 0; // FIXME(URGENT): this whole block should be happening at comptime...
|
|
|
|
for (bit_string, 0..) |char, i| {
|
|
const rev = @typeInfo(ValT).int.bits - 1 - (i - offset);
|
|
|
|
switch (char) {
|
|
'_' => offset += 1,
|
|
else => if (char == field.name[0]) {
|
|
masked_val |= @as(ValT, 1) << @intCast(rev); // no penalty
|
|
},
|
|
}
|
|
}
|
|
|
|
const PextT = if (@typeInfo(ValT).int.bits > 32) u64 else u32;
|
|
const use_hw = bmi2 and !@inComptime();
|
|
|
|
break :blk @truncate(if (use_hw) pext.hw(PextT, value, masked_val) else pext.sw(PextT, value, masked_val));
|
|
};
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
test extract {
|
|
// doc tests
|
|
{
|
|
const ret = extract("aaaa", @as(u4, 0b1001));
|
|
try std.testing.expectEqual(@as(u4, 0b1001), ret.a);
|
|
}
|
|
{
|
|
const ret = extract("abcd", @as(u4, 0b1001));
|
|
try std.testing.expectEqual(@as(u1, 0b1), ret.a);
|
|
try std.testing.expectEqual(@as(u1, 0b0), ret.b);
|
|
try std.testing.expectEqual(@as(u1, 0b0), ret.c);
|
|
try std.testing.expectEqual(@as(u1, 0b1), ret.d);
|
|
}
|
|
{
|
|
const ret = extract("a0ab", @as(u4, 0b1001));
|
|
try std.testing.expectEqual(@as(u2, 0b10), ret.a);
|
|
try std.testing.expectEqual(@as(u1, 0b01), ret.b);
|
|
}
|
|
{
|
|
const ret = extract("-a-a", @as(u4, 0b1001));
|
|
try std.testing.expectEqual(@as(u2, 0b01), ret.a);
|
|
}
|
|
{
|
|
const ret = extract("aa_aa", @as(u4, 0b1001));
|
|
try std.testing.expectEqual(@as(u4, 0b1001), ret.a);
|
|
}
|
|
|
|
// other tests
|
|
{
|
|
const ret = extract("10aaabbc", @as(u8, 0b10110011));
|
|
try std.testing.expectEqual(@as(u3, 0b110), ret.a);
|
|
try std.testing.expectEqual(@as(u2, 0b01), ret.b);
|
|
try std.testing.expectEqual(@as(u1, 0b1), ret.c);
|
|
}
|
|
{
|
|
const ret = extract("1111abababab1010", @as(u16, 0b1111_1110_1101_1010));
|
|
try std.testing.expectEqual(@as(u4, 0b1110), ret.a);
|
|
try std.testing.expectEqual(@as(u4, 0b1011), ret.b);
|
|
}
|
|
{
|
|
const ret = extract("--------ddddrrrr", @as(u16, 0b0000_0010_1011_0110));
|
|
try std.testing.expectEqual(@as(u4, 0b1011), ret.d);
|
|
try std.testing.expectEqual(@as(u4, 0b0110), ret.r);
|
|
}
|
|
{
|
|
const ret = extract("--------", @as(u8, 0b00000000));
|
|
const T = @TypeOf(ret);
|
|
|
|
try std.testing.expectEqual(@as(usize, 0), @typeInfo(T).@"struct".fields.len);
|
|
}
|
|
{
|
|
const ret = extract("00000000", @as(u8, 0b00000000));
|
|
const T = @TypeOf(ret);
|
|
|
|
try std.testing.expectEqual(@as(usize, 0), @typeInfo(T).@"struct".fields.len);
|
|
}
|
|
{
|
|
const ret = extract("0-0-0-0-", @as(u8, 0b01010101));
|
|
const T = @TypeOf(ret);
|
|
|
|
try std.testing.expectEqual(@as(usize, 0), @typeInfo(T).@"struct".fields.len);
|
|
}
|
|
|
|
{
|
|
const ret = extract(
|
|
"11111111_ssssssss_11111111_dddddddd_11111111_vvvvvvvv_11111111_xxxxxxxx",
|
|
@as(u64, 0xFF55_FF77_FF33_FF00),
|
|
);
|
|
|
|
try std.testing.expectEqual(@as(u8, 0x55), ret.s);
|
|
try std.testing.expectEqual(@as(u8, 0x77), ret.d);
|
|
try std.testing.expectEqual(@as(u8, 0x33), ret.v);
|
|
try std.testing.expectEqual(@as(u8, 0x00), ret.x);
|
|
}
|
|
}
|
|
|
|
pub fn matchExtract(comptime bit_string: []const u8, value: anytype) ?Bitfield(bit_string) {
|
|
if (!match(bit_string, value)) return null;
|
|
return extract(bit_string, value);
|
|
}
|
|
|
|
/// Parses a bit string and reifies a struct that will contain fields that correspond to the variables present in the bit string.
|
|
///
|
|
/// TODO: I will probably rename this type
|
|
pub fn Bitfield(comptime bit_string: []const u8) type {
|
|
const StructField = std.builtin.Type.StructField;
|
|
|
|
const alphabet_set: u26 = tmp: {
|
|
var bit_set: u26 = 0;
|
|
|
|
for (bit_string) |char| {
|
|
switch (char) {
|
|
'a'...'z' => |c| bit_set |= @as(u26, 1) << @intCast(c - 'a'),
|
|
else => continue,
|
|
}
|
|
}
|
|
|
|
break :tmp bit_set;
|
|
};
|
|
|
|
const field_len = @popCount(alphabet_set);
|
|
|
|
const fields = blk: {
|
|
var tmp: [field_len]StructField = undefined;
|
|
|
|
const Tmp = struct { bits: u8 = 0, char: ?u8 = null };
|
|
var things: [field_len]Tmp = [_]Tmp{.{}} ** field_len; // TODO: rename this lol
|
|
|
|
for (bit_string) |char| {
|
|
switch (char) {
|
|
'a'...'z' => |c| {
|
|
const bit_in_set = @as(u26, 1) << @intCast(c - 'a');
|
|
const pos = @popCount(alphabet_set & ~(bit_in_set - 1)) - 1; // TODO: investigate this off-by-one
|
|
|
|
things[pos].bits += 1;
|
|
things[pos].char = c;
|
|
},
|
|
'1', '0', '-', '_' => continue,
|
|
else => @compileError("unexpected char '" ++ [_]u8{char} ++ "' when parsing bit string"),
|
|
}
|
|
}
|
|
|
|
for (things, &tmp) |th, *field| {
|
|
const FieldInt = @Type(.{ .int = .{ .signedness = .unsigned, .bits = th.bits } });
|
|
|
|
field.* = .{
|
|
.name = &.{th.char.?},
|
|
.type = FieldInt,
|
|
.default_value_ptr = null,
|
|
.is_comptime = false,
|
|
.alignment = @alignOf(FieldInt),
|
|
};
|
|
}
|
|
|
|
break :blk tmp;
|
|
};
|
|
|
|
return @Type(.{ .@"struct" = .{
|
|
.layout = .auto,
|
|
.fields = &fields,
|
|
.decls = &.{},
|
|
.is_tuple = false,
|
|
} });
|
|
}
|
|
|
|
fn verify(comptime T: type, comptime bit_string: []const u8) void {
|
|
const info = @typeInfo(T);
|
|
|
|
std.debug.assert(info != .comptime_int);
|
|
std.debug.assert(info.int.signedness == .unsigned);
|
|
std.debug.assert(info.int.bits <= 64); // x86 PEXT u32 and u64 operands only
|
|
|
|
var underscore_count = 0;
|
|
for (bit_string) |c| {
|
|
if (c == '_') underscore_count += 1;
|
|
}
|
|
|
|
std.debug.assert((bit_string.len - underscore_count) == info.int.bits);
|
|
}
|
|
|
|
const pext = struct {
|
|
fn hw(comptime T: type, value: T, mask: T) T {
|
|
return switch (T) {
|
|
u32 => asm ("pextl %[mask], %[value], %[ret]"
|
|
: [ret] "=r" (-> T),
|
|
: [value] "r" (value),
|
|
[mask] "r" (mask),
|
|
),
|
|
u64 => asm ("pextq %[mask], %[value], %[ret]"
|
|
: [ret] "=r" (-> T),
|
|
: [value] "r" (value),
|
|
[mask] "r" (mask),
|
|
),
|
|
else => @compileError("pext is sunsupported for " ++ @typeName(T) ++ "."),
|
|
};
|
|
}
|
|
|
|
inline fn sw(comptime T: type, value: T, mask: T) T {
|
|
// FIXME: will be replaced in the future by https://github.com/ziglang/zig/issues/14995 (hopefully?)
|
|
|
|
return switch (T) {
|
|
u32, u64 => {
|
|
// code source: https://stackoverflow.com/questions/41720249/detecting-matching-bits-in-c
|
|
// TODO: rewrite more in generic/idiomatic zig
|
|
const log2_bits = @typeInfo(Log2Int(T)).int.bits;
|
|
|
|
var val: T = value & mask; // immediately clear irrelevant bits
|
|
var msk: T = mask;
|
|
|
|
var mk: T = ~msk << 1; // count 0s to the right
|
|
|
|
inline for (0..log2_bits) |i| {
|
|
var mp: T = mk ^ (mk << 1);
|
|
inline for (1..log2_bits) |j| {
|
|
mp = mp ^ (mp << (1 << j)); // parallel suffix
|
|
}
|
|
|
|
const mv = (mp & msk); // bits to move
|
|
msk = ((msk ^ mv) | (mv >> (1 << i))); // compress mask
|
|
|
|
const t = (val & mv);
|
|
val = ((val ^ t) | (t >> (1 << i))); // compress val
|
|
|
|
mk &= ~mp;
|
|
}
|
|
|
|
return val;
|
|
},
|
|
else => @compileError("pext is sunsupported for " ++ @typeName(T) ++ "."),
|
|
};
|
|
}
|
|
|
|
test pext {
|
|
const builtin = @import("builtin");
|
|
|
|
try std.testing.expectEqual(@as(u32, 0x0001_2567), pext.sw(u32, 0x12345678, 0xFF00FFF0));
|
|
try std.testing.expectEqual(@as(u64, 0x0001_2567), pext.sw(u64, 0x12345678, 0xFF00FFF0));
|
|
|
|
switch (builtin.cpu.arch) {
|
|
.x86_64 => if (std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2)) {
|
|
var rand_impl = std.Random.DefaultPrng.init(0xBAADF00D_DEADCAFE);
|
|
|
|
for (0..100) |_| {
|
|
const value = rand_impl.random().int(u32);
|
|
const mask = rand_impl.random().int(u32);
|
|
|
|
try std.testing.expectEqual(pext.hw(u32, value, mask), pext.sw(u32, value, mask));
|
|
}
|
|
|
|
for (0..100) |_| {
|
|
const value = rand_impl.random().int(u64);
|
|
const mask = rand_impl.random().int(u64);
|
|
|
|
try std.testing.expectEqual(pext.hw(u64, value, mask), pext.sw(u64, value, mask));
|
|
}
|
|
},
|
|
else => return error.SkipZigTest,
|
|
}
|
|
|
|
// example values from: https://en.wikipedia.org/w/index.php?title=X86_Bit_manipulation_instruction_set&oldid=1170426748
|
|
try std.testing.expectEqual(@as(u32, 0x0001_2567), pext.sw(u32, 0x12345678, 0xFF00FFF0));
|
|
}
|
|
};
|
|
|
|
test "doc test" {
|
|
const value: u8 = 0b10001011;
|
|
|
|
try std.testing.expectEqual(true, match("1000_1011", value));
|
|
try std.testing.expectEqual(false, match("11111011", value));
|
|
try std.testing.expectEqual(true, match("1---1011", value));
|
|
|
|
{
|
|
const ret = extract("1000aaaa", value);
|
|
try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
|
|
}
|
|
{
|
|
const ret = extract("1aaa1aaa", value);
|
|
try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
|
|
}
|
|
{
|
|
const ret = extract("1---abcd", value);
|
|
try std.testing.expectEqual(@as(u3, 0b1), ret.a);
|
|
try std.testing.expectEqual(@as(u3, 0b0), ret.b);
|
|
try std.testing.expectEqual(@as(u3, 0b1), ret.c);
|
|
try std.testing.expectEqual(@as(u3, 0b1), ret.d);
|
|
}
|
|
}
|