diff options
Diffstat (limited to 'src/core/arm/dyncom/arm_dyncom_interpreter.cpp')
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_interpreter.cpp | 374 |
1 files changed, 300 insertions, 74 deletions
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index a37e6c94e..9b291862c 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -63,16 +63,21 @@ extern void switch_mode(arm_core_t *core, uint32_t mode); typedef arm_core_t arm_processor; typedef unsigned int (*shtop_fp_t)(arm_processor *cpu, unsigned int sht_oper); +// Defines a reservation granule of 2 words, which protects the first 2 words starting at the tag. +// This is the smallest granule allowed by the v7 spec, and is coincidentally just large enough to +// support LDR/STREXD. +static const ARMword RESERVATION_GRANULE_MASK = 0xFFFFFFF8; + // Exclusive memory access static int exclusive_detect(ARMul_State* state, ARMword addr){ - if(state->exclusive_tag == addr) + if(state->exclusive_tag == (addr & RESERVATION_GRANULE_MASK)) return 0; else return -1; } static void add_exclusive_addr(ARMul_State* state, ARMword addr){ - state->exclusive_tag = addr; + state->exclusive_tag = addr & RESERVATION_GRANULE_MASK; return; } @@ -80,7 +85,6 @@ static void remove_exclusive(ARMul_State* state, ARMword addr){ state->exclusive_tag = 0xFFFFFFFF; } - unsigned int DPO(Immediate)(arm_processor *cpu, unsigned int sht_oper) { unsigned int immed_8 = BITS(sht_oper, 0, 7); unsigned int rotate_imm = BITS(sht_oper, 8, 11); @@ -871,6 +875,8 @@ typedef struct _mvn_inst { typedef struct _rev_inst { unsigned int Rd; unsigned int Rm; + unsigned int op1; + unsigned int op2; } rev_inst; typedef struct _rsb_inst { @@ -972,6 +978,16 @@ typedef struct _smlal_inst { unsigned int RdLo; } smlal_inst; +typedef struct smlald_inst { + unsigned int RdLo; + unsigned int RdHi; + unsigned int Rm; + unsigned int Rn; + unsigned int swap; + unsigned int op1; + unsigned int op2; +} smlald_inst; + typedef struct _mla_inst { unsigned int S; unsigned int Rn; @@ -1067,7 +1083,7 @@ typedef struct _cdp_inst { unsigned int cp_num; unsigned int opcode_2; unsigned int CRm; - uint32 inst; + unsigned int inst; }cdp_inst; typedef struct _uxtb_inst { @@ -1407,15 +1423,19 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(bx)(unsigned int inst, int index) arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(bx_inst)); bx_inst *inst_cream = (bx_inst *)inst_base->component; - inst_base->cond = BITS(inst, 28, 31); - inst_base->idx = index; - inst_base->br = INDIRECT_BRANCH; + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = INDIRECT_BRANCH; - inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rm = BITS(inst, 0, 3); return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(bxj)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("BXJ"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(bxj)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(bx)(inst, index); +} + ARM_INST_PTR INTERPRETER_TRANSLATE(cdp)(unsigned int inst, int index){ arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(cdp_inst)); cdp_inst *inst_cream = (cdp_inst *)inst_base->component; @@ -2041,7 +2061,37 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index) return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QADD"); } + +ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->op1 = BITS(inst, 21, 22); + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rd = BITS(inst, 12, 15); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(qadd)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(qadd)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(qadd)(inst, index); +} + ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index) { arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); @@ -2068,9 +2118,6 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index) { return INTERPRETER_TRANSLATE(qadd8)(inst, index); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QDADD"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QDSUB"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QSUB"); } ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index) { return INTERPRETER_TRANSLATE(qadd8)(inst, index); @@ -2083,36 +2130,33 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index) { return INTERPRETER_TRANSLATE(qadd8)(inst, index); } + ARM_INST_PTR INTERPRETER_TRANSLATE(rev)(unsigned int inst, int index) { - arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst)); - rev_inst *inst_cream = (rev_inst *)inst_base->component; + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst)); + rev_inst* const inst_cream = (rev_inst*)inst_base->component; - inst_base->cond = BITS(inst, 28, 31); - inst_base->idx = index; - inst_base->br = NON_BRANCH; + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; inst_base->load_r15 = 0; - inst_cream->Rm = BITS(inst, 0, 3); - inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->op1 = BITS(inst, 20, 22); + inst_cream->op2 = BITS(inst, 5, 7); return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(rev16)(unsigned int inst, int index){ - arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst)); - rev_inst *inst_cream = (rev_inst *)inst_base->component; - - inst_base->cond = BITS(inst, 28, 31); - inst_base->idx = index; - inst_base->br = NON_BRANCH; - inst_base->load_r15 = 0; - - inst_cream->Rm = BITS(inst, 0, 3); - inst_cream->Rd = BITS(inst, 12, 15); - - return inst_base; +ARM_INST_PTR INTERPRETER_TRANSLATE(rev16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(rev)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(revsh)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(rev)(inst, index); } -ARM_INST_PTR INTERPRETER_TRANSLATE(revsh)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("REVSH"); } + ARM_INST_PTR INTERPRETER_TRANSLATE(rfe)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("RFE"); } ARM_INST_PTR INTERPRETER_TRANSLATE(rsb)(unsigned int inst, int index) { @@ -2360,9 +2404,50 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smlal)(unsigned int inst, int index) } ARM_INST_PTR INTERPRETER_TRANSLATE(smlalxy)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLALXY"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(smlald)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLALD"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(smlaw)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLAW"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(smlsld)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SMLSLD"); } + +ARM_INST_PTR INTERPRETER_TRANSLATE(smlaw)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlad_inst)); + smlad_inst* const inst_cream = (smlad_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Ra = BITS(inst, 12, 15); + inst_cream->Rm = BITS(inst, 8, 11); + inst_cream->Rn = BITS(inst, 0, 3); + inst_cream->Rd = BITS(inst, 16, 19); + inst_cream->m = BIT(inst, 6); + + return inst_base; +} + +ARM_INST_PTR INTERPRETER_TRANSLATE(smlald)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(smlald_inst)); + smlald_inst* const inst_cream = (smlald_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rm = BITS(inst, 8, 11); + inst_cream->Rn = BITS(inst, 0, 3); + inst_cream->RdLo = BITS(inst, 12, 15); + inst_cream->RdHi = BITS(inst, 16, 19); + inst_cream->swap = BIT(inst, 5); + inst_cream->op1 = BITS(inst, 20, 22); + inst_cream->op2 = BITS(inst, 5, 7); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(smlsld)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(smlald)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(smmla)(unsigned int inst, int index) { @@ -3408,7 +3493,7 @@ static tdstate decode_thumb_instr(arm_processor *cpu, uint32_t inst, addr_t addr tdstate ret = thumb_translate (addr, inst, arm_inst, inst_size); if(ret == t_branch){ // TODO: FIXME, endian should be judged - uint32 tinstr; + u32 tinstr; if((addr & 0x3) != 0) tinstr = inst >> 16; else @@ -3421,7 +3506,7 @@ static tdstate decode_thumb_instr(arm_processor *cpu, uint32_t inst, addr_t addr case 26: case 27: if (((tinstr & 0x0F00) != 0x0E00) && ((tinstr & 0x0F00) != 0x0F00)){ - uint32 cond = (tinstr & 0x0F00) >> 8; + u32 cond = (tinstr & 0x0F00) >> 8; inst_index = table_length - 4; *ptr_inst_base = arm_instruction_trans[inst_index](tinstr, inst_index); } else { @@ -4052,22 +4137,35 @@ unsigned InterpreterMainLoop(ARMul_State* state) { INC_PC(sizeof(blx_inst)); goto DISPATCH; } + BX_INST: + BXJ_INST: { - bx_inst *inst_cream = (bx_inst *)inst_base->component; - if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { + // Note that only the 'fail' case of BXJ is emulated. This is because + // the facilities for Jazelle emulation are not implemented. + // + // According to the ARM documentation on BXJ, if setting the J bit in the APSR + // fails, then BXJ functions identically like a regular BX instruction. + // + // This is sufficient for citra, as the CPU for the 3DS does not implement Jazelle. + + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + bx_inst* const inst_cream = (bx_inst*)inst_base->component; + if (inst_cream->Rm == 15) LOG_WARNING(Core_ARM11, "BX at pc %x: use of Rm = R15 is discouraged", cpu->Reg[15]); + cpu->TFlag = cpu->Reg[inst_cream->Rm] & 0x1; cpu->Reg[15] = cpu->Reg[inst_cream->Rm] & 0xfffffffe; INC_PC(sizeof(bx_inst)); goto DISPATCH; } + cpu->Reg[15] += GET_INST_SIZE(cpu); INC_PC(sizeof(bx_inst)); goto DISPATCH; } - BXJ_INST: + CDP_INST: { cdp_inst *inst_cream = (cdp_inst *)inst_base->component; @@ -4519,7 +4617,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) { add_exclusive_addr(cpu, read_addr); cpu->exclusive_state = 1; - // TODO(bunnei): Do we need to also make [read_addr + 4] exclusive? RD = Memory::Read32(read_addr); RD2 = Memory::Read32(read_addr + 4); @@ -4992,6 +5089,78 @@ unsigned InterpreterMainLoop(ARMul_State* state) { } QADD_INST: + QDADD_INST: + QDSUB_INST: + QSUB_INST: + { + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + const u8 op1 = inst_cream->op1; + const u32 rm_val = RM; + const u32 rn_val = RN; + + u32 result = 0; + + // QADD + if (op1 == 0x00) { + result = rm_val + rn_val; + + if (AddOverflow(rm_val, rn_val, result)) { + result = POS(result) ? 0x80000000 : 0x7FFFFFFF; + cpu->Cpsr |= (1 << 27); + } + } + // QSUB + else if (op1 == 0x01) { + result = rm_val - rn_val; + + if (SubOverflow(rm_val, rn_val, result)) { + result = POS(result) ? 0x80000000 : 0x7FFFFFFF; + cpu->Cpsr |= (1 << 27); + } + } + // QDADD + else if (op1 == 0x02) { + u32 mul = (rn_val * 2); + + if (AddOverflow(rn_val, rn_val, rn_val * 2)) { + mul = POS(mul) ? 0x80000000 : 0x7FFFFFFF; + cpu->Cpsr |= (1 << 27); + } + + result = mul + rm_val; + + if (AddOverflow(rm_val, mul, result)) { + result = POS(result) ? 0x80000000 : 0x7FFFFFFF; + cpu->Cpsr |= (1 << 27); + } + } + // QDSUB + else if (op1 == 0x03) { + u32 mul = (rn_val * 2); + + if (AddOverflow(rn_val, rn_val, mul)) { + mul = POS(mul) ? 0x80000000 : 0x7FFFFFFF; + cpu->Cpsr |= (1 << 27); + } + + result = rm_val - mul; + + if (SubOverflow(rm_val, mul, result)) { + result = POS(result) ? 0x80000000 : 0x7FFFFFFF; + cpu->Cpsr |= (1 << 27); + } + } + + RD = result; + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + QADD8_INST: QADD16_INST: QADDSUBX_INST: @@ -5054,42 +5223,39 @@ unsigned InterpreterMainLoop(ARMul_State* state) { GOTO_NEXT_INST; } - QDADD_INST: - QDSUB_INST: - QSUB_INST: REV_INST: - { - rev_inst *inst_cream = (rev_inst *)inst_base->component; - if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { - RD = ((RM & 0xff) << 24) | - (((RM >> 8) & 0xff) << 16) | - (((RM >> 16) & 0xff) << 8) | - ((RM >> 24) & 0xff); - if (inst_cream->Rm == 15) { - LOG_ERROR(Core_ARM11, "invalid operand for REV"); - CITRA_IGNORE_EXIT(-1); - } - } - cpu->Reg[15] += GET_INST_SIZE(cpu); - INC_PC(sizeof(rev_inst)); - FETCH_INST; - GOTO_NEXT_INST; - } REV16_INST: + REVSH_INST: { - rev_inst *inst_cream = (rev_inst *)inst_base->component; - if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { - RD = (BITS(RM, 0, 7) << 8) | - BITS(RM, 8, 15) | - (BITS(RM, 16, 23) << 24) | - (BITS(RM, 24, 31) << 16); + + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + rev_inst* const inst_cream = (rev_inst*)inst_base->component; + + const u8 op1 = inst_cream->op1; + const u8 op2 = inst_cream->op2; + + // REV + if (op1 == 0x03 && op2 == 0x01) { + RD = ((RM & 0xFF) << 24) | (((RM >> 8) & 0xFF) << 16) | (((RM >> 16) & 0xFF) << 8) | ((RM >> 24) & 0xFF); + } + // REV16 + else if (op1 == 0x03 && op2 == 0x05) { + RD = ((RM & 0xFF) << 8) | ((RM & 0xFF00) >> 8) | ((RM & 0xFF0000) << 8) | ((RM & 0xFF000000) >> 8); + } + // REVSH + else if (op1 == 0x07 && op2 == 0x05) { + RD = ((RM & 0xFF) << 8) | ((RM & 0xFF00) >> 8); + if (RD & 0x8000) + RD |= 0xffff0000; + } } + cpu->Reg[15] += GET_INST_SIZE(cpu); INC_PC(sizeof(rev_inst)); FETCH_INST; GOTO_NEXT_INST; } - REVSH_INST: + RFE_INST: RSB_INST: { @@ -5425,7 +5591,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) { operand2 = (BIT(RS, 31)) ? (BITS(RS, 16, 31) | 0xffff0000) : BITS(RS, 16, 31); RD = operand1 * operand2 + RN; - // TODO: FIXME: UPDATE Q FLAGS + if (AddOverflow(operand1 * operand2, RN, RD)) + cpu->Cpsr |= (1 << 27); } cpu->Reg[15] += GET_INST_SIZE(cpu); INC_PC(sizeof(smla_inst)); @@ -5519,9 +5686,69 @@ unsigned InterpreterMainLoop(ARMul_State* state) { } SMLALXY_INST: - SMLALD_INST: + SMLAW_INST: + { + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + smlad_inst* const inst_cream = (smlad_inst*)inst_base->component; + + const u32 rm_val = RM; + const u32 rn_val = RN; + const u32 ra_val = cpu->Reg[inst_cream->Ra]; + const bool high = (inst_cream->m == 1); + + const s16 operand2 = (high) ? ((rm_val >> 16) & 0xFFFF) : (rm_val & 0xFFFF); + const s64 result = (s64)(s32)rn_val * (s64)(s32)operand2 + ((s64)(s32)ra_val << 16); + + RD = (result & (0xFFFFFFFFFFFFFFFFLL >> 15)) >> 16; + + if ((result >> 16) != (s32)RD) + cpu->Cpsr |= (1 << 27); + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(smlad_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + + SMLALD_INST: SMLSLD_INST: + { + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + smlald_inst* const inst_cream = (smlald_inst*)inst_base->component; + + const bool do_swap = (inst_cream->swap == 1); + const u32 rdlo_val = RDLO; + const u32 rdhi_val = RDHI; + const u32 rn_val = RN; + u32 rm_val = RM; + + if (do_swap) + rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16)); + + const s32 product1 = (s16)(rn_val & 0xFFFF) * (s16)(rm_val & 0xFFFF); + const s32 product2 = (s16)((rn_val >> 16) & 0xFFFF) * (s16)((rm_val >> 16) & 0xFFFF); + s64 result; + + // SMLALD + if (BIT(inst_cream->op2, 1) == 0) { + result = (product1 + product2) + (s64)(rdlo_val | ((s64)rdhi_val << 32)); + } + // SMLSLD + else { + result = (product1 - product2) + (s64)(rdlo_val | ((s64)rdhi_val << 32)); + } + + RDLO = (result & 0xFFFFFFFF); + RDHI = ((result >> 32) & 0xFFFFFFFF); + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(smlald_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } SMMLA_INST: SMMLS_INST: @@ -5909,7 +6136,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) { if ((exclusive_detect(cpu, write_addr) == 0) && (cpu->exclusive_state == 1)) { remove_exclusive(cpu, write_addr); cpu->exclusive_state = 0; - // TODO(bunnei): Remove exclusive from [write_addr + 4] if we implement this in LDREXD Memory::Write32(write_addr, cpu->Reg[inst_cream->Rm]); Memory::Write32(write_addr + 4, cpu->Reg[inst_cream->Rm + 1]); @@ -6514,7 +6740,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) { BLX_1_THUMB: { // BLX 1 for armv5t and above - uint32 tmp = cpu->Reg[15]; + u32 tmp = cpu->Reg[15]; blx_1_thumb *inst_cream = (blx_1_thumb *)inst_base->component; cpu->Reg[15] = (cpu->Reg[14] + inst_cream->imm) & 0xFFFFFFFC; cpu->Reg[14] = ((tmp + 2) | 1); |