From 253cbbcdffa597cc2e06e48259387aaeb8e69ed3 Mon Sep 17 00:00:00 2001 From: Rekai Musuka Date: Fri, 8 Sep 2023 23:18:40 -0500 Subject: [PATCH] feat(v5te): impl BLX, QDADD/QDSUB, SMLAL, SMLAW, SMULW, SMUL --- src/arm/cpu/arm/psr_transfer.zig | 125 ++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 18 deletions(-) diff --git a/src/arm/cpu/arm/psr_transfer.zig b/src/arm/cpu/arm/psr_transfer.zig index 75ce507..c97af6b 100644 --- a/src/arm/cpu/arm/psr_transfer.zig +++ b/src/arm/cpu/arm/psr_transfer.zig @@ -11,7 +11,7 @@ pub fn control(comptime InstrFn: type, comptime I: bool, comptime op: u6) InstrF return struct { fn inner(cpu: *Arm32, opcode: u32) void { if (I) { - // MSR (register) + // MSR Immediate const R = op >> 5 & 1 == 1; return msr(R, I, cpu, opcode); } @@ -38,8 +38,18 @@ pub fn control(comptime InstrFn: type, comptime I: bool, comptime op: u6) InstrF cpu.r[rd] = @clz(cpu.r[rm]); }, - 0b01_0011 => cpu.panic("TODO: implement v5TE BLX", .{}), - 0b00_0101, 0b01_0101 => { + 0b01_0011 => { // BLX + const rm = opcode & 0xF; + + const thumb = cpu.r[rm] & 1 == 1; + cpu.r[14] = cpu.r[15] - 4; // TODO: Why - 4? + + cpu.r[15] = cpu.r[rm] & ~@as(u32, 1); + cpu.cpsr.t.write(thumb); + + cpu.pipe.reload(cpu); + }, + 0b00_0101, 0b01_0101 => { // QADD / QSUB const U = op >> 4 & 1 == 1; const rm = opcode & 0xF; @@ -53,8 +63,21 @@ pub fn control(comptime InstrFn: type, comptime I: bool, comptime op: u6) InstrF if (cpu.r[rd] == if (U) 0x8000_0000 else 0x7FFF_FFFF) cpu.cpsr.q.set(); }, - 0b10_0101 => cpu.panic("TODO: implement QDADD", .{}), - 0b11_0101 => cpu.panic("TODO: implement QDSUB", .{}), + 0b10_0101, 0b11_0101 => { // QDADD / QDSUB + const U = op >> 4 & 1 == 1; + + const rm = opcode & 0xF; + const rd = opcode >> 8 & 0xF; + const rn = opcode >> 16 & 0xF; + + const product = @as(i32, @bitCast(cpu.r[rn])) *| 2; + if (product == 0x7FFF_FFFF) cpu.cpsr.q.set(); + + const left: i32 = @bitCast(cpu.r[rm]); + + cpu.r[rd] = @bitCast(if (U) left -| product else left +| product); + if (cpu.r[rd] == if (U) 0x800_0000 else 0x7FFF_FFFF) cpu.cpsr.q.set(); + }, 0b01_0111 => cpu.panic("TODO: handle BKPT", .{}), 0b00_1000, 0b00_1010, 0b00_1100, 0b00_1110 => { // SMLA const X = op >> 1 & 1; @@ -65,17 +88,87 @@ pub fn control(comptime InstrFn: type, comptime I: bool, comptime op: u6) InstrF const rn = opcode >> 12 & 0xF; const rd = opcode >> 16 & 0xF; - const left: i16 = @intCast((cpu.r[rm] >> 16 * X) & 0xFFFF); - const right: i16 = @intCast((cpu.r[rs] >> 16 * Y) & 0xFFFF); + const left: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rm] >> 16 * X)))); + const right: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y)))); const accumulate: i32 = @bitCast(cpu.r[rn]); - const result = @addWithOverflow(@as(i32, left) * @as(i32, right), accumulate); + const result = @addWithOverflow(left * right, accumulate); cpu.r[rd] = @bitCast(result[0]); if (result[1] == 0b1) cpu.cpsr.q.set(); }, - else => cpu.panic("unhandled instruction: 0x{X:0>8}", .{opcode}), + 0b10_1000, 0b10_1010, 0b10_1100, 0b10_1110 => { // SMLAL + const X = op >> 1 & 1; + const Y = op >> 2 & 1; + + const rm = opcode & 0xF; + const rs = opcode >> 8 & 0xF; + const rd_lo = opcode >> 12 & 0xF; + const rd_hi = opcode >> 16 & 0xF; + + const left: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rm] >> 16 * X)))); + const right: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y)))); + + // TODO: de-clutter this lmao + cpu.r[rd_lo] = @bitCast(@as(i32, @bitCast(cpu.r[rd_lo])) + (left * right)); + + cpu.r[rd_hi] = blk: { + const _left: i32 = @bitCast(cpu.r[rd_hi]); + const _mid: i32 = if (left * right < 0) @bitCast(@as(u32, 0xFFFF_FFFF)) else 0; + + // FIXME: chances are the read from rd_lo here is incorrect + const _right: i32 = @addWithOverflow(@as(i32, @bitCast(cpu.r[rd_lo])), left * right)[1]; + + break :blk @bitCast(_left + _mid + _right); + }; + }, + 0b01_1000, 0b01_1100 => { // SMLAW + const Y = op >> 2 & 1; + + // TODO: deduplicate all this + const rm = opcode & 0xF; + const rs = opcode >> 8 & 0xF; + const rn = opcode >> 12 & 0xF; + const rd = opcode >> 16 & 0xF; + + const right: i16 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y)))); + const left: i48 = @as(i32, @bitCast(cpu.r[rm])); + const accumulate: i32 = @bitCast(cpu.r[rn]); + + const ret = @addWithOverflow(@as(i32, @truncate((left * right) >> 16)), accumulate); + + cpu.r[rd] = @bitCast(ret[0]); + if (ret[1] == 0b1) cpu.cpsr.q.set(); + }, + + 0b01_1010, 0b01_1110 => { // SMULW + const Y = op >> 2 & 1; + + const rm = opcode & 0xF; + const rs = opcode >> 8 & 0xF; + const rd = opcode >> 16 & 0xF; + + const right: i64 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y)))); + const left: i64 = @as(i32, @bitCast(cpu.r[rm])); + + const product: i32 = @truncate((left * right) >> 16); + cpu.r[rd] = @bitCast(product); + }, + 0b11_1000, 0b11_1010, 0b11_1100, 0b11_1110 => { // SMUL + const X = op >> 1 & 1; + const Y = op >> 2 & 1; + + const rm = opcode & 0xF; + const rs = opcode >> 8 & 0xF; + const rd = opcode >> 16 & 0xF; + + const left: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rm] >> 16 * X)))); + const right: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y)))); + + cpu.r[rd] = @bitCast(left *% right); + }, + else => cpu.panic("0x{X:0>8} was improperly handled by the control instruction extension space", .{opcode}), } } @@ -99,16 +192,12 @@ pub fn control(comptime InstrFn: type, comptime I: bool, comptime op: u6) InstrF } fn fieldMask(psr: *const PSR, field_mask: u4, right: u32) u32 { - // This bitwise ORs bits 3 and 0 of the field mask into a u2 - // We do this because we only care about bits 7:0 and 31:28 of the CPSR - const bits: u2 = @truncate((field_mask >> 2 & 0x2) | (field_mask & 1)); + var mask: u32 = 0; - const mask: u32 = switch (bits) { - 0b00 => 0x0000_0000, - 0b01 => 0x0000_00FF, - 0b10 => 0xF000_0000, - 0b11 => 0xF000_00FF, - }; + inline for (0..4) |i| { + if (field_mask & @as(u4, 1) << i != 0) + mask |= @as(u32, 0xFF) << 8 * i; + } return (psr.raw & ~mask) | (right & mask); }