From c3ae727ed1f2f03b4a8425ef1315e4348d720afd Mon Sep 17 00:00:00 2001 From: Rekai Musuka Date: Fri, 21 Oct 2022 05:12:28 -0300 Subject: [PATCH] fix: improve perf of instructions w/ rotr --- src/cpu/arm/half_signed_data_transfer.zig | 5 +++-- src/cpu/arm/psr_transfer.zig | 4 +++- src/cpu/arm/single_data_swap.zig | 4 +++- src/cpu/arm/single_data_transfer.zig | 4 +++- src/cpu/barrel_shifter.zig | 4 +++- src/cpu/thumb/data_transfer.zig | 14 ++++++++------ src/util.zig | 11 ++++++++++- 7 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/cpu/arm/half_signed_data_transfer.zig b/src/cpu/arm/half_signed_data_transfer.zig index fd3ee58..ce630ff 100644 --- a/src/cpu/arm/half_signed_data_transfer.zig +++ b/src/cpu/arm/half_signed_data_transfer.zig @@ -5,6 +5,7 @@ const Arm7tdmi = @import("../../cpu.zig").Arm7tdmi; const InstrFn = @import("../../cpu.zig").ArmInstrFn; const sext = @import("../../util.zig").sext; +const rotr = @import("../../util.zig").rotr; pub fn halfAndSignedDataTransfer(comptime P: bool, comptime U: bool, comptime I: bool, comptime W: bool, comptime L: bool) InstrFn { return struct { @@ -38,7 +39,7 @@ pub fn halfAndSignedDataTransfer(comptime P: bool, comptime U: bool, comptime I: 0b01 => { // LDRH const value = bus.read16(address & 0xFFFF_FFFE); - result = std.math.rotr(u32, value, 8 * (address & 1)); + result = rotr(u32, value, 8 * (address & 1)); }, 0b10 => { // LDRSB @@ -52,7 +53,7 @@ pub fn halfAndSignedDataTransfer(comptime P: bool, comptime U: bool, comptime I: break :blk sext(16, bus.read16(address)); }; - result = std.math.rotr(u32, value, 8 * (address & 1)); + result = rotr(u32, value, 8 * (address & 1)); }, 0b00 => unreachable, // SWP } diff --git a/src/cpu/arm/psr_transfer.zig b/src/cpu/arm/psr_transfer.zig index 8eb3e94..f9e3d17 100644 --- a/src/cpu/arm/psr_transfer.zig +++ b/src/cpu/arm/psr_transfer.zig @@ -7,6 +7,8 @@ const PSR = @import("../../cpu.zig").PSR; const log = std.log.scoped(.PsrTransfer); +const rotr = @import("../../util.zig").rotr; + pub fn psrTransfer(comptime I: bool, comptime R: bool, comptime kind: u2) InstrFn { return struct { fn inner(cpu: *Arm7tdmi, _: *Bus, opcode: u32) void { @@ -22,7 +24,7 @@ pub fn psrTransfer(comptime I: bool, comptime R: bool, comptime kind: u2) InstrF // MSR const field_mask = @truncate(u4, opcode >> 16 & 0xF); const rm_idx = opcode & 0xF; - const right = if (I) std.math.rotr(u32, opcode & 0xFF, (opcode >> 8 & 0xF) << 1) else cpu.r[rm_idx]; + const right = if (I) rotr(u32, opcode & 0xFF, (opcode >> 8 & 0xF) << 1) else cpu.r[rm_idx]; if (R and !cpu.hasSPSR()) log.warn("Tried to write to SPSR in User/System Mode", .{}); diff --git a/src/cpu/arm/single_data_swap.zig b/src/cpu/arm/single_data_swap.zig index f8e76cc..9a1e544 100644 --- a/src/cpu/arm/single_data_swap.zig +++ b/src/cpu/arm/single_data_swap.zig @@ -4,6 +4,8 @@ const Bus = @import("../../Bus.zig"); const Arm7tdmi = @import("../../cpu.zig").Arm7tdmi; const InstrFn = @import("../../cpu.zig").ArmInstrFn; +const rotr = @import("../../util.zig").rotr; + pub fn singleDataSwap(comptime B: bool) InstrFn { return struct { fn inner(cpu: *Arm7tdmi, bus: *Bus, opcode: u32) void { @@ -20,7 +22,7 @@ pub fn singleDataSwap(comptime B: bool) InstrFn { cpu.r[rd] = value; } else { // SWP - const value = std.math.rotr(u32, bus.read32(address & 0xFFFF_FFFC), 8 * (address & 0x3)); + const value = rotr(u32, bus.read32(address & 0xFFFF_FFFC), 8 * (address & 0x3)); bus.write32(address & 0xFFFF_FFFC, cpu.r[rm]); cpu.r[rd] = value; } diff --git a/src/cpu/arm/single_data_transfer.zig b/src/cpu/arm/single_data_transfer.zig index 103bde0..bfbb6fb 100644 --- a/src/cpu/arm/single_data_transfer.zig +++ b/src/cpu/arm/single_data_transfer.zig @@ -6,6 +6,8 @@ const Bus = @import("../../Bus.zig"); const Arm7tdmi = @import("../../cpu.zig").Arm7tdmi; const InstrFn = @import("../../cpu.zig").ArmInstrFn; +const rotr = @import("../../util.zig").rotr; + pub fn singleDataTransfer(comptime I: bool, comptime P: bool, comptime U: bool, comptime B: bool, comptime W: bool, comptime L: bool) InstrFn { return struct { fn inner(cpu: *Arm7tdmi, bus: *Bus, opcode: u32) void { @@ -33,7 +35,7 @@ pub fn singleDataTransfer(comptime I: bool, comptime P: bool, comptime U: bool, } else { // LDR const value = bus.read32(address & 0xFFFF_FFFC); - result = std.math.rotr(u32, value, 8 * (address & 0x3)); + result = rotr(u32, value, 8 * (address & 0x3)); } } else { if (B) { diff --git a/src/cpu/barrel_shifter.zig b/src/cpu/barrel_shifter.zig index 89d77bc..7b2c1b8 100644 --- a/src/cpu/barrel_shifter.zig +++ b/src/cpu/barrel_shifter.zig @@ -3,6 +3,8 @@ const std = @import("std"); const Arm7tdmi = @import("../cpu.zig").Arm7tdmi; const CPSR = @import("../cpu.zig").PSR; +const rotr = @import("../util.zig").rotr; + pub fn execute(comptime S: bool, cpu: *Arm7tdmi, opcode: u32) u32 { var result: u32 = undefined; if (opcode >> 4 & 1 == 1) { @@ -141,7 +143,7 @@ pub fn arithmeticRight(comptime S: bool, cpsr: *CPSR, rm: u32, total_amount: u8) } pub fn rotateRight(comptime S: bool, cpsr: *CPSR, rm: u32, total_amount: u8) u32 { - const result = std.math.rotr(u32, rm, total_amount); + const result = rotr(u32, rm, total_amount); if (S and total_amount != 0) { cpsr.c.write(result >> 31 & 1 == 1); diff --git a/src/cpu/thumb/data_transfer.zig b/src/cpu/thumb/data_transfer.zig index 9807df7..9cb58bc 100644 --- a/src/cpu/thumb/data_transfer.zig +++ b/src/cpu/thumb/data_transfer.zig @@ -4,6 +4,8 @@ const Bus = @import("../../Bus.zig"); const Arm7tdmi = @import("../../cpu.zig").Arm7tdmi; const InstrFn = @import("../../cpu.zig").ThumbInstrFn; +const rotr = @import("../../util.zig").rotr; + pub fn format6(comptime rd: u3) InstrFn { return struct { fn inner(cpu: *Arm7tdmi, bus: *Bus, opcode: u16) void { @@ -39,7 +41,7 @@ pub fn format78(comptime op: u2, comptime T: bool) InstrFn { 0b10 => { // LDRH const value = bus.read16(address & 0xFFFF_FFFE); - cpu.r[rd] = std.math.rotr(u32, value, 8 * (address & 1)); + cpu.r[rd] = rotr(u32, value, 8 * (address & 1)); }, 0b11 => { // LDRSH @@ -49,7 +51,7 @@ pub fn format78(comptime op: u2, comptime T: bool) InstrFn { break :blk sext(16, bus.read16(address)); }; - cpu.r[rd] = std.math.rotr(u32, value, 8 * (address & 1)); + cpu.r[rd] = rotr(u32, value, 8 * (address & 1)); }, } } else { @@ -66,7 +68,7 @@ pub fn format78(comptime op: u2, comptime T: bool) InstrFn { 0b10 => { // LDR const value = bus.read32(address & 0xFFFF_FFFC); - cpu.r[rd] = std.math.rotr(u32, value, 8 * (address & 0x3)); + cpu.r[rd] = rotr(u32, value, 8 * (address & 0x3)); }, 0b11 => { // LDRB @@ -93,7 +95,7 @@ pub fn format9(comptime B: bool, comptime L: bool, comptime offset: u5) InstrFn // LDR const address = cpu.r[rb] + (@as(u32, offset) << 2); const value = bus.read32(address & 0xFFFF_FFFC); - cpu.r[rd] = std.math.rotr(u32, value, 8 * (address & 0x3)); + cpu.r[rd] = rotr(u32, value, 8 * (address & 0x3)); } } else { if (B) { @@ -121,7 +123,7 @@ pub fn format10(comptime L: bool, comptime offset: u5) InstrFn { if (L) { // LDRH const value = bus.read16(address & 0xFFFF_FFFE); - cpu.r[rd] = std.math.rotr(u32, value, 8 * (address & 1)); + cpu.r[rd] = rotr(u32, value, 8 * (address & 1)); } else { // STRH bus.write16(address & 0xFFFF_FFFE, @truncate(u16, cpu.r[rd])); @@ -139,7 +141,7 @@ pub fn format11(comptime L: bool, comptime rd: u3) InstrFn { if (L) { // LDR const value = bus.read32(address & 0xFFFF_FFFC); - cpu.r[rd] = std.math.rotr(u32, value, 8 * (address & 0x3)); + cpu.r[rd] = rotr(u32, value, 8 * (address & 0x3)); } else { // STR bus.write32(address & 0xFFFF_FFFC, cpu.r[rd]); diff --git a/src/util.zig b/src/util.zig index 6e3db87..95d2744 100644 --- a/src/util.zig +++ b/src/util.zig @@ -1,12 +1,21 @@ const std = @import("std"); +const Log2Int = std.math.Log2Int; -pub fn sext(comptime bits: comptime_int, value: u32) u32 { +pub inline fn sext(comptime bits: comptime_int, value: u32) u32 { comptime std.debug.assert(bits <= 32); const amount = 32 - bits; return @bitCast(u32, @bitCast(i32, value << amount) >> amount); } +/// See https://godbolt.org/z/W3en9Eche +pub inline fn rotr(comptime T: type, value: T, r: anytype) T { + comptime std.debug.assert(@typeInfo(T).Int.signedness == .unsigned); + const ar = @truncate(Log2Int(T), r); + + return value >> ar | value << @truncate(Log2Int(T), @typeInfo(T).Int.bits - @as(T, ar)); +} + pub const FpsAverage = struct { const Self = @This();