hamt/src/trie.zig
Rekai Musuka 3ee7dd0370 fix: ensure that rhs of shifts are generic
mistakenly left an @intCast that implied a u5 (which only works when
Digest == u32)
2023-04-20 23:34:06 -05:00

355 lines
13 KiB
Zig

const std = @import("std");
const Allocator = std.mem.Allocator;
const Log2Int = std.math.Log2Int;
/// Hash Array Mapped Trie
/// https://idea.popcount.org/2012-07-25-introduction-to-hamt/
pub fn HashArrayMappedTrie(comptime K: type, comptime V: type, comptime Context: type) type {
// zig fmt: off
comptime { verify(K, Context); }
// zig fmt: on
return struct {
const Self = @This();
const Digest = Context.Digest; // as in Hash Code or Hash Digest
const table_size = @typeInfo(Digest).Int.bits;
const t = @intCast(Log2Int(Digest), @typeInfo(Log2Int(Digest)).Int.bits);
root: []?*Node,
const Node = union(enum) { kv: Pair, table: Table };
const Table = struct { map: Digest = 0, base: [*]Node };
pub const Pair = struct { key: K, value: V };
pub fn init(allocator: Allocator) !Self {
// TODO: Add ability to have a larger root node (for quicker lookup times)
const root = try allocator.alloc(?*Node, table_size);
std.mem.set(?*Node, root, null);
return Self{ .root = root };
}
pub fn deinit(self: *Self, allocator: Allocator) void {
for (self.root) |maybe_node| {
const node = maybe_node orelse continue;
_deinit(allocator, node);
allocator.destroy(node);
}
allocator.free(self.root);
}
fn _deinit(allocator: Allocator, node: *Node) void {
switch (node.*) {
.kv => |_| return, // will be deallocated by caller
.table => |table| {
const amt_ptr = table.base[0..@popCount(table.map)]; // Array Mapped Table
for (amt_ptr) |*sub_node| {
if (sub_node.* == .table) {
_deinit(allocator, sub_node);
}
}
allocator.free(amt_ptr);
},
}
}
fn tableIdx(hash: Digest, offset: u16) Log2Int(Digest) {
const shift_amt = @intCast(Log2Int(Digest), table_size - offset);
return @truncate(Log2Int(Digest), hash >> shift_amt);
}
pub fn search(self: *Self, key: K) ?Pair {
const hash = Context.hash(key);
// most siginificant t bits from hash
var hash_offset: Log2Int(Digest) = t;
var current: *Node = self.root[tableIdx(hash, hash_offset)] orelse return null;
while (true) {
switch (current.*) {
.table => |table| {
const mask = @as(Digest, 1) << tableIdx(hash, hash_offset);
if (table.map & mask != 0) {
const idx = @popCount(table.map & (mask - 1));
current = &table.base[idx];
hash_offset += t;
} else return null; // hash table entry is empty
},
.kv => |pair| {
if (!Context.eql(pair.key, key)) return null;
return pair;
},
}
}
}
pub fn insert(self: *Self, allocator: Allocator, key: K, value: V) !void {
const hash = Context.hash(key);
// most siginificant t bits from hash
var hash_offset: Log2Int(Digest) = t;
const root_idx = tableIdx(hash, hash_offset);
var current: *Node = self.root[root_idx] orelse {
// node in root table is empty, place the KV here
const node = try allocator.create(Node);
node.* = .{ .kv = .{ .key = key, .value = value } };
self.root[root_idx] = node;
return;
};
while (true) {
const mask = @as(Digest, 1) << tableIdx(hash, hash_offset);
switch (current.*) {
.table => |*table| {
if (table.map & mask == 0) {
// Empty
const old_len = @popCount(table.map);
const new_base = try allocator.alloc(Node, old_len + 1);
const new_map = table.map | mask;
var i: Log2Int(Digest) = 0;
for (0..table_size) |shift| {
const mask_loop = @as(Digest, 1) << @intCast(Log2Int(Digest), shift);
if (new_map & mask_loop != 0) {
defer i += 1;
const idx = @popCount(table.map & (mask_loop - 1));
const copy = if (mask == mask_loop) Node{ .kv = Pair{ .key = key, .value = value } } else table.base[idx];
new_base[i] = copy;
}
}
allocator.free(table.base[0..old_len]);
table.base = new_base.ptr;
table.map = new_map;
return; // inserted an elemnt into the Trie
} else {
// Found an entry in the array, continue loop (?)
const idx = @popCount(table.map & (mask - 1));
current = &table.base[idx];
hash_offset += t; // Go one layer deper
}
},
.kv => |prev_pair| {
const prev_hash = Context.hash(prev_pair.key);
const prev_mask = @as(Digest, 1) << tableIdx(prev_hash, hash_offset);
switch (std.math.order(mask, prev_mask)) {
.lt, .gt => {
// there are no collisions between the two hash subsets.
const pairs = try allocator.alloc(Node, 2);
const map = mask | prev_mask;
pairs[@popCount(map & (prev_mask - 1))] = .{ .kv = prev_pair };
pairs[@popCount(map & (mask - 1))] = .{ .kv = .{ .key = key, .value = value } };
current.* = .{ .table = .{ .map = map, .base = pairs.ptr } };
return;
},
.eq => {
const copied_pair = try allocator.alloc(Node, 1);
copied_pair[0] = .{ .kv = prev_pair };
current.* = .{ .table = .{ .map = mask, .base = copied_pair.ptr } };
},
}
},
}
}
}
pub fn print(self: *Self) !void {
const stdout = std.io.getStdOut().writer();
var buffered = std.io.bufferedWriter(stdout);
const w = buffered.writer();
for (self.root, 0..) |maybe_node, i| {
try w.print("{:0>2}: ", .{i});
if (maybe_node) |node| {
try _print(w, node, 1);
} else {
try w.print("null\n", .{});
}
}
try buffered.flush();
}
fn _print(w: anytype, node: *Node, depth: u16) !void {
// @compileLog(@TypeOf(w));
switch (node.*) {
.kv => |pair| {
try w.print(".{{ .key = \"{s}\", .value = {} }}\n", .{ pair.key, pair.value });
},
.table => |table| {
try w.print(".{{ .map = 0x{X:0>8}, .ptr = {*} }}\n", .{ table.map, table.base });
for (0..@popCount(table.map)) |i| {
for (0..depth) |_| try w.print(" ", .{});
try w.print("{:0>2}: ", .{i});
try _print(w, &table.base[i], depth + 1);
}
},
}
}
};
}
pub fn verify(comptime K: type, comptime Context: type) void {
// FIXME: Context should be able to be a pointer to a type
switch (@typeInfo(Context)) {
.Struct, .Union, .Enum => {},
.Pointer => @compileError("Pointer trie contexts have yet to be implemented"),
else => @compileError("Trie context must be a type with Digest, hash(" ++ @typeName(K) ++ ") Digest, and eql(" ++ @typeName(K) ++ ", " ++ @typeName(K) ++ ") bool"),
}
if (@hasDecl(Context, "Digest")) {
const Digest = Context.Digest;
const info = @typeInfo(Digest);
if (info != .Int) @compileError("Context.Digest must be an integer, however it was actually " ++ @typeName(Digest));
if (info.Int.signedness != .unsigned) @compileError("Context.Digest must be an unsigned integer, however it was actually an " ++ @typeName(Digest));
}
if (@hasDecl(Context, "hash")) {
const hash = Context.hash;
const HashFn = @TypeOf(hash);
const info = @typeInfo(HashFn);
if (info != .Fn) @compileError("Context.hash must be a function, however it was actually" ++ @typeName(HashFn));
const func = info.Fn;
if (func.params.len != 1) @compileError("Invalid Context.hash signature. Expected hash(" ++ @typeName(K) ++ "), but was actually " ++ @typeName(HashFn));
// short-circuiting guarantees no panics..............vvv here
if (func.params[0].type == null or func.params[0].type.? != K) {
const type_str = if (func.params[0].type) |Param| @typeName(Param) else "null";
@compileError("Invalid Context.hash signature. Parameter must be " ++ @typeName(K) ++ ", however it was " ++ type_str);
}
if (func.return_type == null or func.return_type.? != Context.Digest) {
const type_str = if (func.return_type) |Return| @typeName(Return) else "null";
@compileError("Invalid Context.hash signature. Return type must be " ++ @typeName(Context.Digest) ++ ", however it was " ++ type_str);
}
}
if (@hasDecl(Context, "eql")) {
const eql = Context.eql;
const EqlFn = @TypeOf(eql);
const info = @typeInfo(EqlFn);
if (info != .Fn) @compileError("Context.eql must be a function, however it was actually" ++ @typeName(EqlFn));
const func = info.Fn;
if (func.params.len != 2) @compileError("Invalid Context.eql signature. Expected eql(" ++ @typeName(K) ++ ", " ++ @typeName(K) ++ "), but was actually " ++ @typeName(EqlFn));
// short-circuiting guarantees no panics..............vvv here
if (func.params[0].type == null or func.params[0].type.? != K) {
const type_str = if (func.params[0].type) |Param| @typeName(Param) else "null";
@compileError("Invalid Context.eql signature. First parameter must be " ++ @typeName(K) ++ ", however it was " ++ type_str);
}
if (func.params[1].type == null or func.params[1].type.? != K) {
const type_str = if (func.params[1].type) |Param| @typeName(Param) else "null";
@compileError("Invalid Context.eql signature. Second parameter must be " ++ @typeName(K) ++ ", however it was " ++ type_str);
}
if (func.return_type == null or func.return_type.? != bool) {
const type_str = if (func.return_type) |Return| @typeName(Return) else "null";
@compileError("Invalid Context.eql signature, Return type must be " ++ @typeName(bool) ++ ", however it was " ++ type_str);
}
}
}
const StringContext = struct {
pub const Digest = u64;
pub inline fn hash(key: []const u8) Digest {
return std.hash.Wyhash.hash(0, key);
}
pub inline fn eql(left: []const u8, right: []const u8) bool {
return std.mem.eql(u8, left, right);
}
};
const StringTrie = HashArrayMappedTrie([]const u8, void, StringContext);
test "trie init" {
const allocator = std.testing.allocator;
var trie = try StringTrie.init(allocator);
defer trie.deinit(allocator);
}
test "init and deinit" {
const allocator = std.testing.allocator;
var trie = try StringTrie.init(allocator);
defer trie.deinit(allocator);
}
test "trie insert" {
const allocator = std.testing.allocator;
var trie = try StringTrie.init(allocator);
defer trie.deinit(allocator);
try trie.insert(allocator, "hello", {});
try trie.insert(allocator, "world", {});
}
test "trie search" {
const Pair = StringTrie.Pair;
const allocator = std.testing.allocator;
var trie = try StringTrie.init(allocator);
defer trie.deinit(allocator);
try std.testing.expectEqual(@as(?Pair, null), trie.search("sdvx"));
try trie.insert(allocator, "sdvx", {});
try std.testing.expectEqual(@as(?Pair, .{ .key = "sdvx", .value = {} }), trie.search("sdvx"));
try std.testing.expectEqual(@as(?Pair, null), trie.search(""));
try trie.insert(allocator, "", {});
try std.testing.expectEqual(@as(?Pair, .{ .key = "", .value = {} }), trie.search(""));
}
test "README.md example" {
const Pair = StringTrie.Pair;
const allocator = std.testing.allocator;
var trie = try StringTrie.init(allocator);
defer trie.deinit(allocator);
try trie.insert(allocator, "hello", {});
try std.testing.expectEqual(@as(?Pair, .{ .key = "hello", .value = {} }), trie.search("hello"));
try std.testing.expectEqual(@as(?Pair, null), trie.search("world"));
}