feat: initial commit

This commit is contained in:
Rekai Nyangadzayi Musuka 2023-08-16 22:13:28 -05:00
commit 6328c370de
6 changed files with 492 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
zig-out/
zig-cache/

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) [2023] [Rekai Nyangadzayi Musuka]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

44
README.md Normal file
View File

@ -0,0 +1,44 @@
# Bit String
A library to check and extract values from integers based on a "bit string". Primarily intended for (my) emulator instruction decoding, but maybe someone else can find a use for it?
## Example
```zig
const std = @import("std");
test "doc test" {
const value: u8 = 0b10001011;
try std.testing.expectEqual(true, match("10001011", value));
try std.testing.expectEqual(false, match("11111011", value));
try std.testing.expectEqual(true, match("1---1011", value));
{
const ret = extract("1000aaaa", value);
try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
}
{
const ret = extract("1aaa1aaa", value);
try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
}
{
const ret = extract("1---abcd", value);
try std.testing.expectEqual(@as(u3, 0b1), ret.a);
try std.testing.expectEqual(@as(u3, 0b0), ret.b);
try std.testing.expectEqual(@as(u3, 0b1), ret.c);
try std.testing.expectEqual(@as(u3, 0b1), ret.d);
}
}
```
## Syntax
| Token | Meaning | Description
| ------- | --------- | -----------
| `0` | Unset bit | In the equivalent position, the value's bit must be set.
| `1` | Set bit | In the equivalent position, the value's bit must be set.
| `a..=z` | Variable | Given the 4-bit bit string, `"1aa0"`, the value `0b1010` would produce the variable `a` with the value `0b01`
| `-` | Ignored | In the equivalent position, the value's bit does not matter.
## Notes
- This library does the majority of it's work at `comptime`. Due to this, you cannot create strings to match against at runtime.
- Variables do not have to be "sequential". This means the 5-bit bit string `"1aa0a"` with the value `0b10101` will produce the variable `a` with the value `0b011`.

49
build.zig Normal file
View File

@ -0,0 +1,49 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
_ = b.addModule("bit-string", .{ .source_file = .{ .path = "src/lib.zig" } });
const lib = b.addStaticLibrary(.{
.name = "bit-string",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = .{ .path = "src/lib.zig" },
.target = target,
.optimize = optimize,
});
// This declares intent for the library to be installed into the standard
// location when the user invokes the "install" step (the default step when
// running `zig build`).
b.installArtifact(lib);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const lib_tests = b.addTest(.{
.root_source_file = .{ .path = "src/test.zig" },
.target = target,
.optimize = optimize,
});
const run_main_tests = b.addRunArtifact(lib_tests);
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build test`
// This will evaluate the `test` step rather than the default, which is "install".
const test_step = b.step("test", "Run library tests");
test_step.dependOn(&run_main_tests.step);
}

367
src/lib.zig Normal file
View File

@ -0,0 +1,367 @@
//! A library to check and extract values from integers based on a "bit string". Primarily intended for (my) emulator instruction decoding, but maybe someone else can find a use for it?
//!
//! ## Example
//! ```zig
//! const std = @import("std");
//! test "doc test" {
//! const value: u8 = 0b10001011;
//!
//! try std.testing.expectEqual(true, match("10001011", value));
//! try std.testing.expectEqual(false, match("11111011", value));
//! try std.testing.expectEqual(true, match("1---1011", value));
//!
//! {
//! const ret = extract("1000aaaa", value);
//! try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
//! }
//! {
//! const ret = extract("1aaa1aaa", value);
//! try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
//! }
//! {
//! const ret = extract("1---abcd", value);
//! try std.testing.expectEqual(@as(u3, 0b1), ret.a);
//! try std.testing.expectEqual(@as(u3, 0b0), ret.b);
//! try std.testing.expectEqual(@as(u3, 0b1), ret.c);
//! try std.testing.expectEqual(@as(u3, 0b1), ret.d);
//! }
//! }
//! ```
//! ## Syntax
//! | Token | Meaning | Description
//! | ------- | --------- | -----------
//! | `0` | Unset bit | In the equivalent position, the value's bit must be set.
//! | `1` | Set bit | In the equivalent position, the value's bit must be set.
//! | `a..=z` | Variable | Given the 4-bit bit string, `"1aa0"`, the value `0b1010` would produce the variable `a` with the value `0b01`
//! | `-` | Ignored | In the equivalent position, the value's bit does not matter.
//!
//! ## Notes
//! - This library does the majority of it's work at `comptime`. Due to this, you cannot create strings to match against at runtime.
//! - Variables do not have to be "sequential". This means the 5-bit bit string `"1aa0a"` with the value `0b10101` will produce the variable `a` with the value `0b011`.
const std = @import("std");
const Log2Int = std.math.Log2Int;
/// Test to see if a value matches the provided bit-string
///
/// ### Example
/// ```zig
/// match("1100", @as(u4, 0b1100)) // true
/// match("1100", @as(u4, 0b1110)) // false
/// match("1--0", @as(u4, 0b1010)) // true
/// match("1ab0", @as(u4, 0b1010)) // true
/// ```
pub fn match(comptime bit_string: []const u8, value: anytype) bool {
const ValT = @TypeOf(value);
comptime verify(ValT, bit_string);
const masks: struct { ValT, ValT } = comptime blk: {
const bit_count = @typeInfo(ValT).Int.bits;
var set: ValT = 0;
var clr: ValT = 0;
// FIXME: I linear search bit_string 4 separate times. Consider doing a single search and compromizing on memory + stateless API? (imagine a "regex compile"-like API)
for (bit_string, 0..) |char, i| {
switch (char) {
'0' => clr |= @as(ValT, 1) << @intCast(bit_count - 1 - i),
'1' => set |= @as(ValT, 1) << @intCast(bit_count - 1 - i),
'a'...'z', '-' => continue,
else => @compileError("'" ++ [_]u8{char} ++ "' was unexpected when parsing bitstring"),
}
}
break :blk .{ set, clr };
};
const set_mask = masks[0];
const clr_mask = masks[1];
return (value & set_mask) == set_mask and (~value & clr_mask) == clr_mask;
}
test "match" {
// doc tests
try std.testing.expectEqual(true, match("1100", @as(u4, 0b1100))); // true
try std.testing.expectEqual(false, match("1100", @as(u4, 0b1110))); // false
try std.testing.expectEqual(true, match("1--0", @as(u4, 0b1010))); // true
try std.testing.expectEqual(true, match("1ab0", @as(u4, 0b1010))); // true
// other tests
try std.testing.expectEqual(true, match("11111111", @as(u8, 0b11111111)));
try std.testing.expectEqual(true, match("10110011", @as(u8, 0b10110011)));
try std.testing.expectEqual(true, match("101aaabb", @as(u8, 0b10110001)));
try std.testing.expectEqual(true, match("abcdefgh", @as(u8, 0b10110101)));
try std.testing.expectEqual(true, match("aaa---11", @as(u8, 0b01011111)));
try std.testing.expectEqual(true, match("1a0b1c0d", @as(u8, 0b10011101)));
try std.testing.expectEqual(false, match("aaa---11", @as(u8, 0b01011110)));
}
/// Extracts the variables (defined in the bit string) from a value.
///
/// ### Examples
/// ```
/// const ret = extract("aaaa", @as(u4, 0b1001)); // ret.a == 0b1001
/// const ret = extract("abcd", @as(u4, 0b1001)); // ret.a == 0b1, ret.b == 0b0, ret.c == 0b0, ret.d == 0b1
/// const ret = extract("a0ab", @as(u4, 0b1001)); // ret.a == 0b10, ret.b == 0b1
/// const ret = extract("-a-a", @as(u4, 0b1001)); // ret.a == 0b01
/// ```
///
/// Note: In Debug and ReleaseSafe builds, there's a runtime assert that
/// ensures that the value matches against the bit string.
pub fn extract(comptime bit_string: []const u8, value: anytype) Bitfield(bit_string) {
const builtin = @import("builtin");
const ValT = @TypeOf(value);
const ReturnT = Bitfield(bit_string);
const bmi2 = switch (builtin.target.cpu.arch) {
.x86_64 => std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2),
else => false,
};
comptime verify(ValT, bit_string);
std.debug.assert(match(bit_string, value)); // prevents branchless impl in ReleaseSafe
var ret: ReturnT = undefined;
inline for (@typeInfo(ReturnT).Struct.fields) |field| {
@field(ret, field.name) = blk: {
var masked_val: ValT = 0;
for (bit_string, 0..) |char, i| {
const rev = @typeInfo(ValT).Int.bits - 1 - i;
if (char == field.name[0]) masked_val |= @as(ValT, 1) << @intCast(rev); // no penalty
}
// TODO: decide at compile time if we're calling the 32-bit or 64-bit version of `PEXT`
// invariant: the bit count in the field we're writing to and the
// # of bits we happened to find in this linear search are identical
//
// we're confident in this because it's guaranteed to be the same bit_string,
// and it's the same linear search. If you're reading this double check that this is still the case lol
break :blk @truncate(if (bmi2) pext.hardware(u32, value, masked_val) else pext.software(u32, value, masked_val));
};
}
return ret;
}
test "extract" {
// doc tests
{
const ret = extract("aaaa", @as(u4, 0b1001));
try std.testing.expectEqual(@as(u4, 0b1001), ret.a);
}
{
const ret = extract("abcd", @as(u4, 0b1001));
try std.testing.expectEqual(@as(u1, 0b1), ret.a);
try std.testing.expectEqual(@as(u1, 0b0), ret.b);
try std.testing.expectEqual(@as(u1, 0b0), ret.c);
try std.testing.expectEqual(@as(u1, 0b1), ret.d);
}
{
const ret = extract("a0ab", @as(u4, 0b1001));
try std.testing.expectEqual(@as(u2, 0b10), ret.a);
try std.testing.expectEqual(@as(u1, 0b01), ret.b);
}
{
const ret = extract("-a-a", @as(u4, 0b1001));
try std.testing.expectEqual(@as(u2, 0b01), ret.a);
}
// other tests
{
const ret = extract("10aaabbc", @as(u8, 0b10110011));
try std.testing.expectEqual(@as(u3, 0b110), ret.a);
try std.testing.expectEqual(@as(u2, 0b01), ret.b);
try std.testing.expectEqual(@as(u1, 0b1), ret.c);
}
{
const ret = extract("1111abababab1010", @as(u16, 0b1111_1110_1101_1010));
try std.testing.expectEqual(@as(u4, 0b1110), ret.a);
try std.testing.expectEqual(@as(u4, 0b1011), ret.b);
}
}
/// Parses a bit string and reifies a struct that will contain fields that correspond to the variables present in the bit string.
///
///
/// Note: If it weren't for the return type of `extract()`, this type would be a private implementation detail
///
/// TODO: I will probably rename this type
pub fn Bitfield(comptime bit_string: []const u8) type {
const StructField = std.builtin.Type.StructField;
const alphabet_set: u26 = tmp: {
var bit_set: u26 = 0;
for (bit_string) |char| {
switch (char) {
'a'...'z' => |c| bit_set |= @as(u26, 1) << @intCast(c - 'a'),
else => continue,
}
}
break :tmp bit_set;
};
const field_len = @popCount(alphabet_set);
const fields = blk: {
var tmp: [field_len]StructField = undefined;
const Tmp = struct { bits: u8 = 0, char: ?u8 = null };
var things: [field_len]Tmp = [_]Tmp{.{}} ** field_len;
for (bit_string) |char| {
switch (char) {
'a'...'z' => |c| {
const bit_in_set = @as(u26, 1) << @intCast(c - 'a');
const pos = @ctz(alphabet_set & ~(bit_in_set - 1));
things[pos].bits += 1;
things[pos].char = c;
},
'1', '0', '-' => continue,
else => @compileError("error when parsing bitset string"),
}
}
for (things, &tmp) |th, *field| {
const FieldInt = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = th.bits } });
field.* = .{
.name = &.{th.char.?},
.type = FieldInt,
.default_value = null,
.is_comptime = false,
.alignment = @alignOf(FieldInt),
};
}
break :blk tmp;
};
return @Type(.{ .Struct = .{
.layout = .Auto,
.fields = &fields,
.decls = &.{},
.is_tuple = false,
} });
}
fn verify(comptime T: type, comptime bit_string: []const u8) void {
const info = @typeInfo(T);
// FIXME: remove the need for this
if (info.Int.bits > 32) @compileError("TODO: 64-bit `PEXT` software implementation");
std.debug.assert(info != .ComptimeInt);
std.debug.assert(info.Int.signedness == .unsigned);
std.debug.assert(info.Int.bits <= 64); // x86 PEXT u32 and u64 operands only
std.debug.assert(bit_string.len == info.Int.bits); // TODO: Support Underscores?
}
const pext = struct {
fn hardware(comptime T: type, value: T, mask: T) T {
return switch (T) {
u32 => asm ("pextl %[mask], %[value], %[ret]"
: [ret] "=r" (-> T),
: [value] "r" (value),
[mask] "r" (mask),
),
u64 => asm ("pextq %[mask], %[value], %[ret]"
: [ret] "=r" (-> T),
: [value] "r" (value),
[mask] "r" (mask),
),
else => @compileError("pext is sunsupported for " ++ @typeName(T) ++ "."),
};
}
// why we need this: https://github.com/ziglang/zig/issues/14995 (ideally compiler-rt implements this for us)
fn software(comptime T: type, value: T, mask: T) T {
return switch (T) {
u32 => {
// TODO: Looks (and is) like C code :pensive:
// code source: https://stackoverflow.com/questions/41720249/detecting-matching-bits-in-c
var _value: T = value;
var _mask: T = mask;
_value &= _mask;
var mk: T = ~_mask << 1;
var mp: T = undefined;
var mv: T = undefined;
var t: T = undefined;
inline for (0..@typeInfo(u5).Int.bits) |i| {
mp = mk ^ (mk << 1); // parallel suffix
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mp = mp ^ (mp << 8);
mp = mp ^ (mp << 16);
mv = (mp & _mask); // bits to move
_mask = ((_mask ^ mv) | (mv >> (1 << i))); // compress _mask
t = (_value & mv);
_value = ((_value ^ t) | (t >> (1 << i))); // compress _value
mk &= ~mp;
}
return _value;
},
u64 => @compileError("TODO: find/write branchless software impl of `PEXT` for 64-bit values"),
else => @compileError("pext is sunsupported for " ++ @typeName(T) ++ "."),
};
}
test "pext" {
const builtin = @import("builtin");
switch (builtin.cpu.arch) {
.x86_64 => if (std.Target.x86.featureSetHas(builtin.cpu.features, .bmi2)) {
try std.testing.expectEqual(@as(u32, 0x0001_2567), pext.hardware(u32, 0x12345678, 0xFF00FFF0));
try std.testing.expectEqual(@as(u64, 0x0001_2567), pext.hardware(u64, 0x12345678, 0xFF00FFF0));
// random tests
// TODO: when implemented, test 64-bit fallback `PEXT` as well
var rand_impl = std.rand.DefaultPrng.init(0xBAADF00D_DEADCAFE);
for (0..100) |_| {
const value = rand_impl.random().int(u32);
const mask = rand_impl.random().int(u32);
try std.testing.expectEqual(pext.hardware(u32, value, mask), pext.software(u32, value, mask));
}
},
else => {},
}
// example values from: https://en.wikipedia.org/w/index.php?title=X86_Bit_manipulation_instruction_set&oldid=1170426748
try std.testing.expectEqual(@as(u32, 0x0001_2567), pext.software(u32, 0x12345678, 0xFF00FFF0));
}
};
test "doc test" {
const value: u8 = 0b10001011;
try std.testing.expectEqual(true, match("10001011", value));
try std.testing.expectEqual(false, match("11111011", value));
try std.testing.expectEqual(true, match("1---1011", value));
{
const ret = extract("1000aaaa", value);
try std.testing.expectEqual(@as(u4, 0b1011), ret.a);
}
{
const ret = extract("1aaa1aaa", value);
try std.testing.expectEqual(@as(u6, 0b000011), ret.a);
}
{
const ret = extract("1---abcd", value);
try std.testing.expectEqual(@as(u3, 0b1), ret.a);
try std.testing.expectEqual(@as(u3, 0b0), ret.b);
try std.testing.expectEqual(@as(u3, 0b1), ret.c);
try std.testing.expectEqual(@as(u3, 0b1), ret.d);
}
}

9
src/test.zig Normal file
View File

@ -0,0 +1,9 @@
const std = @import("std");
comptime {
_ = @import("lib.zig");
}
test {
std.testing.refAllDecls(@This());
}