arm32/src/arm/cpu/arm/psr_transfer.zig

const std = @import("std");

const PSR = @import("../../../arm.zig").PSR;
const rotr = @import("zba-util").rotr;

const log = std.log.scoped(.ctrl_ext_space);

pub fn control(comptime InstrFn: type, comptime I: bool, comptime op: u6) InstrFn {
    const Arm32 = @typeInfo(@typeInfo(@typeInfo(InstrFn).Pointer.child).Fn.params[0].type.?).Pointer.child;

    return struct {
        fn inner(cpu: *Arm32, opcode: u32) void {
            if (I) {
                // MSR Immediate
                const R = op >> 5 & 1 == 1;
                return msr(R, I, cpu, opcode);
            }

            switch (op) {
                0b00_0000, 0b10_0000 => { // MRS
                    const R = op >> 5 & 1 == 1;
                    const rd = opcode >> 12 & 0xF;

                    if (R and !cpu.hasSPSR()) log.err("Tried to read SPSR from User/System Mode", .{});
                    cpu.r[rd] = if (R) cpu.spsr.raw else cpu.cpsr.raw;
                },
                0b01_0000, 0b11_0000 => { // MSR (register)
                    const R = op >> 5 & 1 == 1;
                    msr(R, false, cpu, opcode);
                },
                0b01_0001 => cpu.panic("TODO: implement v5TE BX", .{}),
                0b11_0001 => { // CLZ
                    if (Arm32.arch == .v4t) return cpu.undefinedInstructionTrap();
                    const rd = opcode >> 12 & 0xF;
                    const rm = opcode & 0xF;

                    if (rd == 0xF) cpu.panic("CLZ: UNPREDICTABLE behaviour when rd == 15", .{});
                    if (rm == 0xF) cpu.panic("CLZ: UNPREDICTABLE behaviour when rm == 15", .{});

                    cpu.r[rd] = @clz(cpu.r[rm]);
                },
                0b01_0011 => { // BLX
                    const rm = opcode & 0xF;

                    const thumb = cpu.r[rm] & 1 == 1;
                    cpu.r[14] = cpu.r[15] - 4; // TODO: Why - 4?

                    cpu.r[15] = cpu.r[rm] & ~@as(u32, 1);
                    cpu.cpsr.t.write(thumb);

                    cpu.pipe.reload(cpu);
                },
                0b00_0101, 0b01_0101, 0b10_0101, 0b11_0101 => { // QADD / QDADD / QSUB / QDSUB
                    if (Arm32.arch == .v4t) return cpu.undefinedInstructionTrap();
                    const U = op >> 4 & 1 == 1;
                    const D = op >> 5 & 1 == 1;

                    const rm = opcode & 0xF;
                    const rd = opcode >> 12 & 0xF;
                    const rn = opcode >> 16 & 0xF;

                    const left: i32 = @bitCast(cpu.r[rm]);
                    const right: i32 = blk: {
                        if (!D) break :blk @bitCast(cpu.r[rn]);

                        const ret = @mulWithOverflow(@as(i32, @bitCast(cpu.r[rn])), 2);
                        var product: i32 = ret[0];

                        if (ret[1] == 0b1) {
                            product = if (product < 0) std.math.maxInt(i32) else std.math.minInt(i32);
                            cpu.cpsr.q.set();
                        }

                        break :blk product;
                    };

                    const ret = if (U) @subWithOverflow(left, right) else @addWithOverflow(left, right);
                    var result: i32 = ret[0];

                    if (ret[1] == 0b1) {
                        result = if (result < 0) std.math.maxInt(i32) else std.math.minInt(i32);
                        cpu.cpsr.q.set();
                    }

                    cpu.r[rd] = @bitCast(result);
                },
                0b01_0111 => cpu.panic("TODO: handle BKPT", .{}),
                0b00_1000, 0b00_1010, 0b00_1100, 0b00_1110 => { // SMLA<x><y>
                    if (Arm32.arch == .v4t) return; // no-op
                    const X = op >> 1 & 1;
                    const Y = op >> 2 & 1;

                    const rm = opcode & 0xF;
                    const rs = opcode >> 8 & 0xF;
                    const rn = opcode >> 12 & 0xF;
                    const rd = opcode >> 16 & 0xF;

                    const left: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rm] >> 16 * X))));
                    const right: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y))));
                    const accumulate: i32 = @bitCast(cpu.r[rn]);

                    const result = @addWithOverflow(left * right, accumulate);
                    cpu.r[rd] = @bitCast(result[0]);

                    if (result[1] == 0b1) cpu.cpsr.q.set();
                },

                0b10_1000, 0b10_1010, 0b10_1100, 0b10_1110 => { // SMLAL<x><y>
                    const X = op >> 1 & 1;
                    const Y = op >> 2 & 1;

                    const rm = opcode & 0xF;
                    const rs = opcode >> 8 & 0xF;
                    const rdlo = opcode >> 12 & 0xF;
                    const rdhi = opcode >> 16 & 0xF;

                    const left: i64 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rm] >> 16 * X))));
                    const right: i64 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y))));
                    const product = left * right;

                    const rdhi_val: i32 = @bitCast(cpu.r[rdhi]);
                    const rdlo_val: i32 = @bitCast(cpu.r[rdlo]);

                    const accumulate = @as(i64, rdhi_val) << 32 | rdlo_val;
                    const sum = product +% accumulate;

                    cpu.r[rdhi] = @bitCast(@as(i32, @truncate(sum >> 32)));
                    cpu.r[rdlo] = @bitCast(@as(i32, @truncate(sum)));
                },
                0b01_1000, 0b01_1100 => { // SMLAW<y>
                    const Y = op >> 2 & 1;

                    // TODO: deduplicate all this
                    const rm = opcode & 0xF;
                    const rs = opcode >> 8 & 0xF;
                    const rn = opcode >> 12 & 0xF;
                    const rd = opcode >> 16 & 0xF;

                    const right: i16 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y))));
                    const left: i48 = @as(i32, @bitCast(cpu.r[rm]));
                    const accumulate: i32 = @bitCast(cpu.r[rn]);

                    const ret = @addWithOverflow(@as(i32, @truncate((left * right) >> 16)), accumulate);

                    cpu.r[rd] = @bitCast(ret[0]);
                    if (ret[1] == 0b1) cpu.cpsr.q.set();
                },

                0b01_1010, 0b01_1110 => { // SMULW<y>
                    const Y = op >> 2 & 1;

                    const rm = opcode & 0xF;
                    const rs = opcode >> 8 & 0xF;
                    const rd = opcode >> 16 & 0xF;

                    const right: i64 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y))));
                    const left: i64 = @as(i32, @bitCast(cpu.r[rm]));

                    const product: i32 = @truncate((left * right) >> 16);
                    cpu.r[rd] = @bitCast(product);
                },
                0b11_1000, 0b11_1010, 0b11_1100, 0b11_1110 => { // SMUL<x><y>
                    const X = op >> 1 & 1;
                    const Y = op >> 2 & 1;

                    const rm = opcode & 0xF;
                    const rs = opcode >> 8 & 0xF;
                    const rd = opcode >> 16 & 0xF;

                    const left: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rm] >> 16 * X))));
                    const right: i32 = @as(i16, @bitCast(@as(u16, @truncate(cpu.r[rs] >> 16 * Y))));

                    cpu.r[rd] = @bitCast(left *% right);
                },
                else => cpu.panic("0x{X:0>8} was improperly handled by the control instruction extension space", .{opcode}),
            }
        }

        inline fn msr(comptime R: bool, comptime imm: bool, cpu: *Arm32, opcode: u32) void {
            const field_mask: u4 = @truncate(opcode >> 16 & 0xF);
            const rm_idx = opcode & 0xF;
            const right = if (imm) rotr(u32, opcode & 0xFF, (opcode >> 8 & 0xF) * 2) else cpu.r[rm_idx];

            if (R and !cpu.hasSPSR()) log.err("Tried to write to SPSR in User/System Mode", .{});

            if (R) {
                // arm.gba seems to expect the SPSR to do somethign in SYS mode,
                // so we just assume that despite writing to the SPSR in USR or SYS mode
                // being UNPREDICTABLE, it just magically has a working SPSR somehow
                cpu.spsr.raw = fieldMask(&cpu.spsr, field_mask, right);
            } else {
                if (cpu.isPrivileged()) cpu.setCpsr(fieldMask(&cpu.cpsr, field_mask, right));
            }
        }
    }.inner;
}

fn fieldMask(psr: *const PSR, field_mask: u4, right: u32) u32 {
    var mask: u32 = 0;

    inline for (0..4) |i| {
        if (field_mask & @as(u4, 1) << i != 0)
            mask |= @as(u32, 0xFF) << 8 * i;
    }

    return (psr.raw & ~mask) | (right & mask);
}