From bd7e691f78d916ed6ae5396b2d646d9b3a053dd7 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 12 Aug 2015 00:00:44 -0400 Subject: x64: Refactor to remove fake interfaces and general cleanups. --- src/common/CMakeLists.txt | 18 +- src/common/abi.cpp | 680 ---------- src/common/abi.h | 78 -- src/common/common_funcs.h | 2 +- src/common/cpu_detect.cpp | 229 ++++ src/common/cpu_detect_generic.cpp | 19 - src/common/cpu_detect_x86.cpp | 229 ---- src/common/fake_emitter.h | 465 ------- src/common/platform.h | 2 +- src/common/x64/abi.cpp | 680 ++++++++++ src/common/x64/abi.h | 78 ++ src/common/x64/emitter.cpp | 1989 +++++++++++++++++++++++++++++ src/common/x64/emitter.h | 1067 ++++++++++++++++ src/common/x64_emitter.cpp | 1989 ----------------------------- src/common/x64_emitter.h | 1067 ---------------- src/video_core/CMakeLists.txt | 10 +- src/video_core/shader/shader.cpp | 9 +- src/video_core/shader/shader_jit.cpp | 36 - src/video_core/shader/shader_jit.h | 85 -- src/video_core/shader/shader_jit_fake.cpp | 91 -- src/video_core/shader/shader_jit_x64.cpp | 20 +- src/video_core/shader/shader_jit_x64.h | 79 ++ 22 files changed, 4154 insertions(+), 4768 deletions(-) delete mode 100644 src/common/abi.cpp delete mode 100644 src/common/abi.h create mode 100644 src/common/cpu_detect.cpp delete mode 100644 src/common/cpu_detect_generic.cpp delete mode 100644 src/common/cpu_detect_x86.cpp delete mode 100644 src/common/fake_emitter.h create mode 100644 src/common/x64/abi.cpp create mode 100644 src/common/x64/abi.h create mode 100644 src/common/x64/emitter.cpp create mode 100644 src/common/x64/emitter.h delete mode 100644 src/common/x64_emitter.cpp delete mode 100644 src/common/x64_emitter.h delete mode 100644 src/video_core/shader/shader_jit.cpp delete mode 100644 src/video_core/shader/shader_jit.h delete mode 100644 src/video_core/shader/shader_jit_fake.cpp create mode 100644 src/video_core/shader/shader_jit_x64.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index af387c6c8..fe82f71e8 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -2,8 +2,8 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) set(SRCS - abi.cpp break_points.cpp + cpu_detect.cpp emu_window.cpp file_util.cpp hash.cpp @@ -22,7 +22,6 @@ set(SRCS ) set(HEADERS - abi.h assert.h bit_field.h break_points.h @@ -61,19 +60,14 @@ set(HEADERS vector_math.h ) -if(_M_X86_64) +if(ARCHITECTURE_X64) set(SRCS ${SRCS} - cpu_detect_x86.cpp - x64_emitter.cpp) + x64/abi.cpp + x64/emitter.cpp) set(HEADERS ${HEADERS} - x64_emitter.h) -else() - set(SRCS ${SRCS} - cpu_detect_generic.cpp) - - set(HEADERS ${HEADERS} - fake_emitter.h) + x64/abi.h + x64/emitter.h) endif() create_directory_groups(${SRCS} ${HEADERS}) diff --git a/src/common/abi.cpp b/src/common/abi.cpp deleted file mode 100644 index d1892ad48..000000000 --- a/src/common/abi.cpp +++ /dev/null @@ -1,680 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include "x64_emitter.h" -#include "abi.h" - -using namespace Gen; - -// Shared code between Win64 and Unix64 - -// Sets up a __cdecl function. -void XEmitter::ABI_EmitPrologue(int maxCallParams) -{ -#ifdef _M_IX86 - // Don't really need to do anything -#elif defined(_M_X86_64) -#if _WIN32 - int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8; - // Set up a stack frame so that we can call functions - // TODO: use maxCallParams - SUB(64, R(RSP), Imm8(stacksize)); -#endif -#else -#error Arch not supported -#endif -} - -void XEmitter::ABI_EmitEpilogue(int maxCallParams) -{ -#ifdef _M_IX86 - RET(); -#elif defined(_M_X86_64) -#ifdef _WIN32 - int stacksize = ((maxCallParams+1)&~1)*8 + 8; - ADD(64, R(RSP), Imm8(stacksize)); -#endif - RET(); -#else -#error Arch not supported - - -#endif -} - -#ifdef _M_IX86 // All32 - -// Shared code between Win32 and Unix32 -void XEmitter::ABI_CallFunction(const void *func) { - ABI_AlignStack(0); - CALL(func); - ABI_RestoreStack(0); -} - -void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { - ABI_AlignStack(1 * 2); - PUSH(16, Imm16(param1)); - CALL(func); - ABI_RestoreStack(1 * 2); -} - -void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { - ABI_AlignStack(1 * 2 + 1 * 4); - PUSH(16, Imm16(param2)); - PUSH(32, Imm32(param1)); - CALL(func); - ABI_RestoreStack(1 * 2 + 1 * 4); -} - -void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { - ABI_AlignStack(1 * 4); - PUSH(32, Imm32(param1)); - CALL(func); - ABI_RestoreStack(1 * 4); -} - -void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { - ABI_AlignStack(2 * 4); - PUSH(32, Imm32(param2)); - PUSH(32, Imm32(param1)); - CALL(func); - ABI_RestoreStack(2 * 4); -} - -void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { - ABI_AlignStack(3 * 4); - PUSH(32, Imm32(param3)); - PUSH(32, Imm32(param2)); - PUSH(32, Imm32(param1)); - CALL(func); - ABI_RestoreStack(3 * 4); -} - -void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { - ABI_AlignStack(3 * 4); - PUSH(32, ImmPtr(param3)); - PUSH(32, Imm32(param2)); - PUSH(32, Imm32(param1)); - CALL(func); - ABI_RestoreStack(3 * 4); -} - -void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) { - ABI_AlignStack(4 * 4); - PUSH(32, ImmPtr(param4)); - PUSH(32, Imm32(param3)); - PUSH(32, Imm32(param2)); - PUSH(32, Imm32(param1)); - CALL(func); - ABI_RestoreStack(4 * 4); -} - -void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { - ABI_AlignStack(1 * 4); - PUSH(32, ImmPtr(param1)); - CALL(func); - ABI_RestoreStack(1 * 4); -} - -void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { - ABI_AlignStack(2 * 4); - PUSH(32, arg2); - PUSH(32, ImmPtr(param1)); - CALL(func); - ABI_RestoreStack(2 * 4); -} - -void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { - ABI_AlignStack(3 * 4); - PUSH(32, arg3); - PUSH(32, arg2); - PUSH(32, ImmPtr(param1)); - CALL(func); - ABI_RestoreStack(3 * 4); -} - -void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { - ABI_AlignStack(3 * 4); - PUSH(32, Imm32(param3)); - PUSH(32, ImmPtr(param2)); - PUSH(32, ImmPtr(param1)); - CALL(func); - ABI_RestoreStack(3 * 4); -} - -// Pass a register as a parameter. -void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { - ABI_AlignStack(1 * 4); - PUSH(32, R(reg1)); - CALL(func); - ABI_RestoreStack(1 * 4); -} - -// Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2) -{ - ABI_AlignStack(2 * 4); - PUSH(32, R(reg2)); - PUSH(32, R(reg1)); - CALL(func); - ABI_RestoreStack(2 * 4); -} - -void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) -{ - ABI_AlignStack(2 * 4); - PUSH(32, Imm32(param2)); - PUSH(32, arg1); - CALL(func); - ABI_RestoreStack(2 * 4); -} - -void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) -{ - ABI_AlignStack(3 * 4); - PUSH(32, Imm32(param3)); - PUSH(32, Imm32(param2)); - PUSH(32, arg1); - CALL(func); - ABI_RestoreStack(3 * 4); -} - -void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) -{ - ABI_AlignStack(1 * 4); - PUSH(32, arg1); - CALL(func); - ABI_RestoreStack(1 * 4); -} - -void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) -{ - ABI_AlignStack(2 * 4); - PUSH(32, arg2); - PUSH(32, arg1); - CALL(func); - ABI_RestoreStack(2 * 4); -} - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - // Note: 4 * 4 = 16 bytes, so alignment is preserved. - PUSH(EBP); - PUSH(EBX); - PUSH(ESI); - PUSH(EDI); -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - POP(EDI); - POP(ESI); - POP(EBX); - POP(EBP); -} - -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { - frameSize += 4; // reserve space for return address - unsigned int alignedSize = -#ifdef __GNUC__ - (frameSize + 15) & -16; -#else - (frameSize + 3) & -4; -#endif - return alignedSize; -} - - -void XEmitter::ABI_AlignStack(unsigned int frameSize) { -// Mac OS X requires the stack to be 16-byte aligned before every call. -// Linux requires the stack to be 16-byte aligned before calls that put SSE -// vectors on the stack, but since we do not keep track of which calls do that, -// it is effectively every call as well. -// Windows binaries compiled with MSVC do not have such a restriction*, but I -// expect that GCC on Windows acts the same as GCC on Linux in this respect. -// It would be nice if someone could verify this. -// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times. - unsigned int fillSize = - ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4); - if (fillSize != 0) { - SUB(32, R(ESP), Imm8(fillSize)); - } -} - -void XEmitter::ABI_RestoreStack(unsigned int frameSize) { - unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize); - alignedSize -= 4; // return address is POPped at end of call - if (alignedSize != 0) { - ADD(32, R(ESP), Imm8(alignedSize)); - } -} - -#else //64bit - -// Common functions -void XEmitter::ABI_CallFunction(const void *func) { - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { - MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(32, R(ABI_PARAM3), Imm32(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(64, R(ABI_PARAM3), ImmPtr(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) { - MOV(32, R(ABI_PARAM1), Imm32(param1)); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(32, R(ABI_PARAM3), Imm32(param3)); - MOV(64, R(ABI_PARAM4), ImmPtr(param4)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - if (!arg2.IsSimpleReg(ABI_PARAM2)) - MOV(32, R(ABI_PARAM2), arg2); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - if (!arg2.IsSimpleReg(ABI_PARAM2)) - MOV(32, R(ABI_PARAM2), arg2); - if (!arg3.IsSimpleReg(ABI_PARAM3)) - MOV(32, R(ABI_PARAM3), arg3); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { - MOV(64, R(ABI_PARAM1), ImmPtr(param1)); - MOV(64, R(ABI_PARAM2), ImmPtr(param2)); - MOV(32, R(ABI_PARAM3), Imm32(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -// Pass a register as a parameter. -void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { - if (reg1 != ABI_PARAM1) - MOV(32, R(ABI_PARAM1), R(reg1)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -// Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { - if (reg2 != ABI_PARAM1) { - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - } else { - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - } - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) -{ - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) -{ - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - MOV(32, R(ABI_PARAM2), Imm32(param2)); - MOV(64, R(ABI_PARAM3), Imm64(param3)); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) -{ - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) -{ - if (!arg1.IsSimpleReg(ABI_PARAM1)) - MOV(32, R(ABI_PARAM1), arg1); - if (!arg2.IsSimpleReg(ABI_PARAM2)) - MOV(32, R(ABI_PARAM2), arg2); - u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); - } else { - CALL(func); - } -} - -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { - return frameSize; -} - -#ifdef _WIN32 - -// The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs. -// But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs. -// Let's just save all 16. -const int XMM_STACK_SPACE = 16 * 16; - -// Win64 Specific Code -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - //we only want to do this once - PUSH(RBX); - PUSH(RSI); - PUSH(RDI); - PUSH(RBP); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - ABI_AlignStack(0); - - // Do this after aligning, because before it's offset by 8. - SUB(64, R(RSP), Imm32(XMM_STACK_SPACE)); - for (int i = 0; i < 16; ++i) - MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i)); -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - for (int i = 0; i < 16; ++i) - MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16)); - ADD(64, R(RSP), Imm32(XMM_STACK_SPACE)); - - ABI_RestoreStack(0); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RBP); - POP(RDI); - POP(RSI); - POP(RBX); -} - -// Win64 Specific Code -void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { - PUSH(RCX); - PUSH(RDX); - PUSH(RSI); - PUSH(RDI); - PUSH(R8); - PUSH(R9); - PUSH(R10); - PUSH(R11); - // TODO: Callers preserve XMM4-5 (XMM0-3 are args.) - ABI_AlignStack(0); -} - -void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { - ABI_RestoreStack(0); - POP(R11); - POP(R10); - POP(R9); - POP(R8); - POP(RDI); - POP(RSI); - POP(RDX); - POP(RCX); -} - -void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { - SUB(64, R(RSP), Imm8(0x28)); -} - -void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { - ADD(64, R(RSP), Imm8(0x28)); -} - -#else -// Unix64 Specific Code -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - PUSH(RBX); - PUSH(RBP); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - PUSH(R15); //just to align stack. duped push/pop doesn't hurt. - // TODO: XMM? -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - POP(R15); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RBP); - POP(RBX); -} - -void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { - PUSH(RCX); - PUSH(RDX); - PUSH(RSI); - PUSH(RDI); - PUSH(R8); - PUSH(R9); - PUSH(R10); - PUSH(R11); - PUSH(R11); -} - -void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { - POP(R11); - POP(R11); - POP(R10); - POP(R9); - POP(R8); - POP(RDI); - POP(RSI); - POP(RDX); - POP(RCX); -} - -void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { - SUB(64, R(RSP), Imm8(0x08)); -} - -void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { - ADD(64, R(RSP), Imm8(0x08)); -} - -#endif // WIN32 - -#endif // 32bit diff --git a/src/common/abi.h b/src/common/abi.h deleted file mode 100644 index bb9f7c95f..000000000 --- a/src/common/abi.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#pragma once - -#include "common_types.h" - -// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code. -// All convensions return values in EAX (+ possibly EDX). - -// Linux 32-bit, Windows 32-bit (cdecl, System V): -// * Caller pushes left to right -// * Caller fixes stack after call -// * function subtract from stack for local storage only. -// Scratch: EAX ECX EDX -// Callee-save: EBX ESI EDI EBP -// Parameters: - - -// Windows 64-bit -// * 4-reg "fastcall" variant, very new-skool stack handling -// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ -// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space. -// Scratch: RAX RCX RDX R8 R9 R10 R11 -// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15 -// Parameters: RCX RDX R8 R9, further MOV-ed - -// Linux 64-bit -// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed) -// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11 -// Callee-save: RBX RBP R12 R13 R14 R15 -// Parameters: RDI RSI RDX RCX R8 R9 - -#ifdef _M_IX86 // 32 bit calling convention, shared by all - -// 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to -// choose regs to put stuff in. -#define ABI_PARAM1 RCX -#define ABI_PARAM2 RDX - -// There are no ABI_PARAM* here, since args are pushed. -// 32-bit bog standard cdecl, shared between linux and windows -// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about. - -#elif _M_X86_64 // 64 bit calling convention - -#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention - -#define ABI_PARAM1 RCX -#define ABI_PARAM2 RDX -#define ABI_PARAM3 R8 -#define ABI_PARAM4 R9 - -#else //64-bit Unix (hopefully MacOSX too) - -#define ABI_PARAM1 RDI -#define ABI_PARAM2 RSI -#define ABI_PARAM3 RDX -#define ABI_PARAM4 RCX -#define ABI_PARAM5 R8 -#define ABI_PARAM6 R9 - -#endif // WIN32 - -#endif // X86 diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 6fd2b06b2..948070e62 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -35,7 +35,7 @@ #ifndef _MSC_VER -#if defined(__x86_64__) || defined(_M_X86_64) +#if defined(__x86_64__) || defined(ARCHITECTURE_X64) #define Crash() __asm__ __volatile__("int $3") #elif defined(_M_ARM) #define Crash() __asm__ __volatile__("trap") diff --git a/src/common/cpu_detect.cpp b/src/common/cpu_detect.cpp new file mode 100644 index 000000000..1d612829e --- /dev/null +++ b/src/common/cpu_detect.cpp @@ -0,0 +1,229 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "common_types.h" +#include "cpu_detect.h" +#include "hash.h" + +#ifndef _WIN32 + +#ifdef __FreeBSD__ +#include +#include +#endif + +static inline void __cpuidex(int info[4], int function_id, int subfunction_id) +{ +#ifdef __FreeBSD__ + // Despite the name, this is just do_cpuid() with ECX as second input. + cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); +#else + info[0] = function_id; // eax + info[2] = subfunction_id; // ecx + __asm__( + "cpuid" + : "=a" (info[0]), + "=b" (info[1]), + "=c" (info[2]), + "=d" (info[3]) + : "a" (function_id), + "c" (subfunction_id) + ); +#endif +} + +static inline void __cpuid(int info[4], int function_id) +{ + return __cpuidex(info, function_id, 0); +} + +#define _XCR_XFEATURE_ENABLED_MASK 0 +static u64 _xgetbv(u32 index) +{ + u32 eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((u64)edx << 32) | eax; +} + +#endif // ifndef _WIN32 + +namespace Common { + +CPUInfo cpu_info; + +CPUInfo::CPUInfo() { + Detect(); +} + +// Detects the various CPU features +void CPUInfo::Detect() { + memset(this, 0, sizeof(*this)); +#ifdef ARCHITECTURE_X64 + Mode64bit = true; + OS64bit = true; +#endif + num_cores = 1; + + // Set obvious defaults, for extra safety + if (Mode64bit) { + bSSE = true; + bSSE2 = true; + bLongMode = true; + } + + // Assume CPU supports the CPUID instruction. Those that don't can barely + // boot modern OS:es anyway. + int cpu_id[4]; + memset(brand_string, 0, sizeof(brand_string)); + + // Detect CPU's CPUID capabilities, and grab CPU string + __cpuid(cpu_id, 0x00000000); + u32 max_std_fn = cpu_id[0]; // EAX + *((int *)brand_string) = cpu_id[1]; + *((int *)(brand_string + 4)) = cpu_id[3]; + *((int *)(brand_string + 8)) = cpu_id[2]; + __cpuid(cpu_id, 0x80000000); + u32 max_ex_fn = cpu_id[0]; + if (!strcmp(brand_string, "GenuineIntel")) + vendor = VENDOR_INTEL; + else if (!strcmp(brand_string, "AuthenticAMD")) + vendor = VENDOR_AMD; + else + vendor = VENDOR_OTHER; + + // Set reasonable default brand string even if brand string not available. + strcpy(cpu_string, brand_string); + + // Detect family and other misc stuff. + bool ht = false; + HTT = ht; + logical_cpu_count = 1; + if (max_std_fn >= 1) { + __cpuid(cpu_id, 0x00000001); + int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff); + int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0); + // Detect people unfortunate enough to be running Dolphin on an Atom + if (family == 6 && (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || + model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D)) + bAtom = true; + logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; + ht = (cpu_id[3] >> 28) & 1; + + if ((cpu_id[3] >> 25) & 1) bSSE = true; + if ((cpu_id[3] >> 26) & 1) bSSE2 = true; + if ((cpu_id[2]) & 1) bSSE3 = true; + if ((cpu_id[2] >> 9) & 1) bSSSE3 = true; + if ((cpu_id[2] >> 19) & 1) bSSE4_1 = true; + if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true; + if ((cpu_id[2] >> 22) & 1) bMOVBE = true; + if ((cpu_id[2] >> 25) & 1) bAES = true; + + if ((cpu_id[3] >> 24) & 1) + { + // We can use FXSAVE. + bFXSR = true; + } + + // AVX support requires 3 separate checks: + // - Is the AVX bit set in CPUID? + // - Is the XSAVE bit set in CPUID? + // - XGETBV result has the XCR bit set. + if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { + if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { + bAVX = true; + if ((cpu_id[2] >> 12) & 1) + bFMA = true; + } + } + + if (max_std_fn >= 7) { + __cpuidex(cpu_id, 0x00000007, 0x00000000); + // careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed + if ((cpu_id[1] >> 5) & 1) + bAVX2 = bAVX; + if ((cpu_id[1] >> 3) & 1) + bBMI1 = true; + if ((cpu_id[1] >> 8) & 1) + bBMI2 = true; + } + } + + bFlushToZero = bSSE; + + if (max_ex_fn >= 0x80000004) { + // Extract CPU model string + __cpuid(cpu_id, 0x80000002); + memcpy(cpu_string, cpu_id, sizeof(cpu_id)); + __cpuid(cpu_id, 0x80000003); + memcpy(cpu_string + 16, cpu_id, sizeof(cpu_id)); + __cpuid(cpu_id, 0x80000004); + memcpy(cpu_string + 32, cpu_id, sizeof(cpu_id)); + } + if (max_ex_fn >= 0x80000001) { + // Check for more features. + __cpuid(cpu_id, 0x80000001); + if (cpu_id[2] & 1) bLAHFSAHF64 = true; + if ((cpu_id[2] >> 5) & 1) bLZCNT = true; + if ((cpu_id[2] >> 16) & 1) bFMA4 = true; + if ((cpu_id[3] >> 29) & 1) bLongMode = true; + } + + num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count; + + if (max_ex_fn >= 0x80000008) { + // Get number of cores. This is a bit complicated. Following AMD manual here. + __cpuid(cpu_id, 0x80000008); + int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF; + if (apic_id_core_id_size == 0) { + if (ht) { + // New mechanism for modern Intel CPUs. + if (vendor == VENDOR_INTEL) { + __cpuidex(cpu_id, 0x00000004, 0x00000000); + int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; + HTT = (cores_x_package < logical_cpu_count); + cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; + num_cores = (cores_x_package > 1) ? cores_x_package : num_cores; + logical_cpu_count /= cores_x_package; + } + } + } else { + // Use AMD's new method. + num_cores = (cpu_id[2] & 0xFF) + 1; + } + } +} + +// Turn the CPU info into a string we can show +std::string CPUInfo::Summarize() { + std::string sum(cpu_string); + sum += " ("; + sum += brand_string; + sum += ")"; + + if (bSSE) sum += ", SSE"; + if (bSSE2) { + sum += ", SSE2"; + if (!bFlushToZero) + sum += " (but not DAZ!)"; + } + if (bSSE3) sum += ", SSE3"; + if (bSSSE3) sum += ", SSSE3"; + if (bSSE4_1) sum += ", SSE4.1"; + if (bSSE4_2) sum += ", SSE4.2"; + if (HTT) sum += ", HTT"; + if (bAVX) sum += ", AVX"; + if (bAVX2) sum += ", AVX2"; + if (bBMI1) sum += ", BMI1"; + if (bBMI2) sum += ", BMI2"; + if (bFMA) sum += ", FMA"; + if (bAES) sum += ", AES"; + if (bMOVBE) sum += ", MOVBE"; + if (bLongMode) sum += ", 64-bit support"; + return sum; +} + +} // namespace Common diff --git a/src/common/cpu_detect_generic.cpp b/src/common/cpu_detect_generic.cpp deleted file mode 100644 index ccec324d9..000000000 --- a/src/common/cpu_detect_generic.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "cpu_detect.h" -#include "hash.h" - -namespace Common { - -CPUInfo cpu_info; - -CPUInfo::CPUInfo() { -} - -std::string CPUInfo::Summarize() { - return "Generic"; -} - -} // namespace Common diff --git a/src/common/cpu_detect_x86.cpp b/src/common/cpu_detect_x86.cpp deleted file mode 100644 index 0bcff726d..000000000 --- a/src/common/cpu_detect_x86.cpp +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include -#include - -#include "common_types.h" -#include "cpu_detect.h" -#include "hash.h" - -#ifndef _WIN32 - -#ifdef __FreeBSD__ -#include -#include -#endif - -static inline void __cpuidex(int info[4], int function_id, int subfunction_id) -{ -#ifdef __FreeBSD__ - // Despite the name, this is just do_cpuid() with ECX as second input. - cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); -#else - info[0] = function_id; // eax - info[2] = subfunction_id; // ecx - __asm__( - "cpuid" - : "=a" (info[0]), - "=b" (info[1]), - "=c" (info[2]), - "=d" (info[3]) - : "a" (function_id), - "c" (subfunction_id) - ); -#endif -} - -static inline void __cpuid(int info[4], int function_id) -{ - return __cpuidex(info, function_id, 0); -} - -#define _XCR_XFEATURE_ENABLED_MASK 0 -static u64 _xgetbv(u32 index) -{ - u32 eax, edx; - __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); - return ((u64)edx << 32) | eax; -} - -#endif // ifndef _WIN32 - -namespace Common { - -CPUInfo cpu_info; - -CPUInfo::CPUInfo() { - Detect(); -} - -// Detects the various CPU features -void CPUInfo::Detect() { - memset(this, 0, sizeof(*this)); -#ifdef _M_X86_64 - Mode64bit = true; - OS64bit = true; -#endif - num_cores = 1; - - // Set obvious defaults, for extra safety - if (Mode64bit) { - bSSE = true; - bSSE2 = true; - bLongMode = true; - } - - // Assume CPU supports the CPUID instruction. Those that don't can barely - // boot modern OS:es anyway. - int cpu_id[4]; - memset(brand_string, 0, sizeof(brand_string)); - - // Detect CPU's CPUID capabilities, and grab CPU string - __cpuid(cpu_id, 0x00000000); - u32 max_std_fn = cpu_id[0]; // EAX - *((int *)brand_string) = cpu_id[1]; - *((int *)(brand_string + 4)) = cpu_id[3]; - *((int *)(brand_string + 8)) = cpu_id[2]; - __cpuid(cpu_id, 0x80000000); - u32 max_ex_fn = cpu_id[0]; - if (!strcmp(brand_string, "GenuineIntel")) - vendor = VENDOR_INTEL; - else if (!strcmp(brand_string, "AuthenticAMD")) - vendor = VENDOR_AMD; - else - vendor = VENDOR_OTHER; - - // Set reasonable default brand string even if brand string not available. - strcpy(cpu_string, brand_string); - - // Detect family and other misc stuff. - bool ht = false; - HTT = ht; - logical_cpu_count = 1; - if (max_std_fn >= 1) { - __cpuid(cpu_id, 0x00000001); - int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff); - int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0); - // Detect people unfortunate enough to be running Dolphin on an Atom - if (family == 6 && (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || - model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D)) - bAtom = true; - logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; - ht = (cpu_id[3] >> 28) & 1; - - if ((cpu_id[3] >> 25) & 1) bSSE = true; - if ((cpu_id[3] >> 26) & 1) bSSE2 = true; - if ((cpu_id[2]) & 1) bSSE3 = true; - if ((cpu_id[2] >> 9) & 1) bSSSE3 = true; - if ((cpu_id[2] >> 19) & 1) bSSE4_1 = true; - if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true; - if ((cpu_id[2] >> 22) & 1) bMOVBE = true; - if ((cpu_id[2] >> 25) & 1) bAES = true; - - if ((cpu_id[3] >> 24) & 1) - { - // We can use FXSAVE. - bFXSR = true; - } - - // AVX support requires 3 separate checks: - // - Is the AVX bit set in CPUID? - // - Is the XSAVE bit set in CPUID? - // - XGETBV result has the XCR bit set. - if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { - if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { - bAVX = true; - if ((cpu_id[2] >> 12) & 1) - bFMA = true; - } - } - - if (max_std_fn >= 7) { - __cpuidex(cpu_id, 0x00000007, 0x00000000); - // careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed - if ((cpu_id[1] >> 5) & 1) - bAVX2 = bAVX; - if ((cpu_id[1] >> 3) & 1) - bBMI1 = true; - if ((cpu_id[1] >> 8) & 1) - bBMI2 = true; - } - } - - bFlushToZero = bSSE; - - if (max_ex_fn >= 0x80000004) { - // Extract CPU model string - __cpuid(cpu_id, 0x80000002); - memcpy(cpu_string, cpu_id, sizeof(cpu_id)); - __cpuid(cpu_id, 0x80000003); - memcpy(cpu_string + 16, cpu_id, sizeof(cpu_id)); - __cpuid(cpu_id, 0x80000004); - memcpy(cpu_string + 32, cpu_id, sizeof(cpu_id)); - } - if (max_ex_fn >= 0x80000001) { - // Check for more features. - __cpuid(cpu_id, 0x80000001); - if (cpu_id[2] & 1) bLAHFSAHF64 = true; - if ((cpu_id[2] >> 5) & 1) bLZCNT = true; - if ((cpu_id[2] >> 16) & 1) bFMA4 = true; - if ((cpu_id[3] >> 29) & 1) bLongMode = true; - } - - num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count; - - if (max_ex_fn >= 0x80000008) { - // Get number of cores. This is a bit complicated. Following AMD manual here. - __cpuid(cpu_id, 0x80000008); - int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF; - if (apic_id_core_id_size == 0) { - if (ht) { - // New mechanism for modern Intel CPUs. - if (vendor == VENDOR_INTEL) { - __cpuidex(cpu_id, 0x00000004, 0x00000000); - int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; - HTT = (cores_x_package < logical_cpu_count); - cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; - num_cores = (cores_x_package > 1) ? cores_x_package : num_cores; - logical_cpu_count /= cores_x_package; - } - } - } else { - // Use AMD's new method. - num_cores = (cpu_id[2] & 0xFF) + 1; - } - } -} - -// Turn the CPU info into a string we can show -std::string CPUInfo::Summarize() { - std::string sum(cpu_string); - sum += " ("; - sum += brand_string; - sum += ")"; - - if (bSSE) sum += ", SSE"; - if (bSSE2) { - sum += ", SSE2"; - if (!bFlushToZero) - sum += " (but not DAZ!)"; - } - if (bSSE3) sum += ", SSE3"; - if (bSSSE3) sum += ", SSSE3"; - if (bSSE4_1) sum += ", SSE4.1"; - if (bSSE4_2) sum += ", SSE4.2"; - if (HTT) sum += ", HTT"; - if (bAVX) sum += ", AVX"; - if (bAVX2) sum += ", AVX2"; - if (bBMI1) sum += ", BMI1"; - if (bBMI2) sum += ", BMI2"; - if (bFMA) sum += ", FMA"; - if (bAES) sum += ", AES"; - if (bMOVBE) sum += ", MOVBE"; - if (bLongMode) sum += ", 64-bit support"; - return sum; -} - -} // namespace Common diff --git a/src/common/fake_emitter.h b/src/common/fake_emitter.h deleted file mode 100644 index d6d96a51e..000000000 --- a/src/common/fake_emitter.h +++ /dev/null @@ -1,465 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!! - -#pragma once - -#include -#include - -#include "assert.h" -#include "common_types.h" - -// TODO: Check if Pandora still needs signal.h/kill here. Symbian doesn't. - -// VCVT flags -#define TO_FLOAT 0 -#define TO_INT 1 << 0 -#define IS_SIGNED 1 << 1 -#define ROUND_TO_ZERO 1 << 2 - -namespace FakeGen -{ -enum FakeReg -{ - // GPRs - R0 = 0, R1, R2, R3, R4, R5, - R6, R7, R8, R9, R10, R11, - - // SPRs - // R13 - R15 are SP, LR, and PC. - // Almost always referred to by name instead of register number - R12 = 12, R13 = 13, R14 = 14, R15 = 15, - R_IP = 12, R_SP = 13, R_LR = 14, R_PC = 15, - - - // VFP single precision registers - S0, S1, S2, S3, S4, S5, S6, - S7, S8, S9, S10, S11, S12, S13, - S14, S15, S16, S17, S18, S19, S20, - S21, S22, S23, S24, S25, S26, S27, - S28, S29, S30, S31, - - // VFP Double Precision registers - D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - - // ASIMD Quad-Word registers - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, - - // for NEON VLD/VST instructions - REG_UPDATE = R13, - INVALID_REG = 0xFFFFFFFF -}; - -enum CCFlags -{ - CC_EQ = 0, // Equal - CC_NEQ, // Not equal - CC_CS, // Carry Set - CC_CC, // Carry Clear - CC_MI, // Minus (Negative) - CC_PL, // Plus - CC_VS, // Overflow - CC_VC, // No Overflow - CC_HI, // Unsigned higher - CC_LS, // Unsigned lower or same - CC_GE, // Signed greater than or equal - CC_LT, // Signed less than - CC_GT, // Signed greater than - CC_LE, // Signed less than or equal - CC_AL, // Always (unconditional) 14 - CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same - CC_LO = CC_CC, // Alias of CC_CC Unsigned lower -}; -const u32 NO_COND = 0xE0000000; - -enum ShiftType -{ - ST_LSL = 0, - ST_ASL = 0, - ST_LSR = 1, - ST_ASR = 2, - ST_ROR = 3, - ST_RRX = 4 -}; -enum IntegerSize -{ - I_I8 = 0, - I_I16, - I_I32, - I_I64 -}; - -enum -{ - NUMGPRs = 13, -}; - -class FakeXEmitter; - -enum OpType -{ - TYPE_IMM = 0, - TYPE_REG, - TYPE_IMMSREG, - TYPE_RSR, - TYPE_MEM -}; - -// This is no longer a proper operand2 class. Need to split up. -class Operand2 -{ - friend class FakeXEmitter; -protected: - u32 Value; - -private: - OpType Type; - - // IMM types - u8 Rotation; // Only for u8 values - - // Register types - u8 IndexOrShift; - ShiftType Shift; -public: - OpType GetType() - { - return Type; - } - Operand2() {} - Operand2(u32 imm, OpType type = TYPE_IMM) - { - Type = type; - Value = imm; - Rotation = 0; - } - - Operand2(FakeReg Reg) - { - Type = TYPE_REG; - Value = Reg; - Rotation = 0; - } - Operand2(u8 imm, u8 rotation) - { - Type = TYPE_IMM; - Value = imm; - Rotation = rotation; - } - Operand2(FakeReg base, ShiftType type, FakeReg shift) // RSR - { - Type = TYPE_RSR; - ASSERT_MSG(type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount"); - IndexOrShift = shift; - Shift = type; - Value = base; - } - - Operand2(FakeReg base, ShiftType type, u8 shift)// For IMM shifted register - { - if(shift == 32) shift = 0; - switch (type) - { - case ST_LSL: - ASSERT_MSG(shift < 32, "Invalid Operand2: LSL %u", shift); - break; - case ST_LSR: - ASSERT_MSG(shift <= 32, "Invalid Operand2: LSR %u", shift); - if (!shift) - type = ST_LSL; - if (shift == 32) - shift = 0; - break; - case ST_ASR: - ASSERT_MSG(shift < 32, "Invalid Operand2: ASR %u", shift); - if (!shift) - type = ST_LSL; - if (shift == 32) - shift = 0; - break; - case ST_ROR: - ASSERT_MSG(shift < 32, "Invalid Operand2: ROR %u", shift); - if (!shift) - type = ST_LSL; - break; - case ST_RRX: - ASSERT_MSG(shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount"); - type = ST_ROR; - break; - } - IndexOrShift = shift; - Shift = type; - Value = base; - Type = TYPE_IMMSREG; - } - u32 GetData() - { - switch(Type) - { - case TYPE_IMM: - return Imm12Mod(); // This'll need to be changed later - case TYPE_REG: - return Rm(); - case TYPE_IMMSREG: - return IMMSR(); - case TYPE_RSR: - return RSR(); - default: - ASSERT_MSG(false, "GetData with Invalid Type"); - return 0; - } - } - u32 IMMSR() // IMM shifted register - { - ASSERT_MSG(Type == TYPE_IMMSREG, "IMMSR must be imm shifted register"); - return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value); - } - u32 RSR() // Register shifted register - { - ASSERT_MSG(Type == TYPE_RSR, "RSR must be RSR Of Course"); - return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value; - } - u32 Rm() - { - ASSERT_MSG(Type == TYPE_REG, "Rm must be with Reg"); - return Value; - } - - u32 Imm5() - { - ASSERT_MSG((Type == TYPE_IMM), "Imm5 not IMM value"); - return ((Value & 0x0000001F) << 7); - } - u32 Imm8() - { - ASSERT_MSG((Type == TYPE_IMM), "Imm8Rot not IMM value"); - return Value & 0xFF; - } - u32 Imm8Rot() // IMM8 with Rotation - { - ASSERT_MSG((Type == TYPE_IMM), "Imm8Rot not IMM value"); - ASSERT_MSG((Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation); - return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF); - } - u32 Imm12() - { - ASSERT_MSG((Type == TYPE_IMM), "Imm12 not IMM"); - return (Value & 0x00000FFF); - } - - u32 Imm12Mod() - { - // This is an IMM12 with the top four bits being rotation and the - // bottom eight being an IMM. This is for instructions that need to - // expand a 8bit IMM to a 32bit value and gives you some rotation as - // well. - // Each rotation rotates to the right by 2 bits - ASSERT_MSG((Type == TYPE_IMM), "Imm12Mod not IMM"); - return ((Rotation & 0xF) << 8) | (Value & 0xFF); - } - u32 Imm16() - { - ASSERT_MSG((Type == TYPE_IMM), "Imm16 not IMM"); - return ( (Value & 0xF000) << 4) | (Value & 0x0FFF); - } - u32 Imm16Low() - { - return Imm16(); - } - u32 Imm16High() // Returns high 16bits - { - ASSERT_MSG((Type == TYPE_IMM), "Imm16 not IMM"); - return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF); - } - u32 Imm24() - { - ASSERT_MSG((Type == TYPE_IMM), "Imm16 not IMM"); - return (Value & 0x0FFFFFFF); - } -}; - -// Use these when you don't know if an imm can be represented as an operand2. -// This lets you generate both an optimal and a fallback solution by checking -// the return value, which will be false if these fail to find a Operand2 that -// represents your 32-bit imm value. -bool TryMakeOperand2(u32 imm, Operand2 &op2); -bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse); -bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated); - -// Use this only when you know imm can be made into an Operand2. -Operand2 AssumeMakeOperand2(u32 imm); - -inline Operand2 R(FakeReg Reg) { return Operand2(Reg, TYPE_REG); } -inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); } -inline Operand2 Mem(void *ptr) { return Operand2((u32)(uintptr_t)ptr, TYPE_IMM); } -//usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str))) - - -struct FixupBranch -{ - u8 *ptr; - u32 condition; // Remembers our codition at the time - int type; //0 = B 1 = BL -}; - -typedef const u8* JumpTarget; - -// XXX: Stop polluting the global namespace -const u32 I_8 = (1 << 0); -const u32 I_16 = (1 << 1); -const u32 I_32 = (1 << 2); -const u32 I_64 = (1 << 3); -const u32 I_SIGNED = (1 << 4); -const u32 I_UNSIGNED = (1 << 5); -const u32 F_32 = (1 << 6); -const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL - -u32 EncodeVd(FakeReg Vd); -u32 EncodeVn(FakeReg Vn); -u32 EncodeVm(FakeReg Vm); - -u32 encodedSize(u32 value); - -// Subtracts the base from the register to give us the real one -FakeReg SubBase(FakeReg Reg); - -// See A.7.1 in the Fakev7-A -// VMUL F32 scalars can only be up to D15[0], D15[1] - higher scalars cannot be individually addressed -FakeReg DScalar(FakeReg dreg, int subScalar); -FakeReg QScalar(FakeReg qreg, int subScalar); - -enum NEONAlignment { - ALIGN_NONE = 0, - ALIGN_64 = 1, - ALIGN_128 = 2, - ALIGN_256 = 3 -}; - - -class NEONXEmitter; - -class FakeXEmitter -{ - friend struct OpArg; // for Write8 etc -private: - u8 *code, *startcode; - u8 *lastCacheFlushEnd; - u32 condition; - -protected: - inline void Write32(u32 value) {*(u32*)code = value; code+=4;} - -public: - FakeXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) { - condition = CC_AL << 28; - } - FakeXEmitter(u8 *code_ptr) { - code = code_ptr; - lastCacheFlushEnd = code_ptr; - startcode = code_ptr; - condition = CC_AL << 28; - } - virtual ~FakeXEmitter() {} - - void SetCodePtr(u8 *ptr) {} - void ReserveCodeSpace(u32 bytes) {} - const u8 *AlignCode16() { return nullptr; } - const u8 *AlignCodePage() { return nullptr; } - const u8 *GetCodePtr() const { return nullptr; } - void FlushIcache() {} - void FlushIcacheSection(u8 *start, u8 *end) {} - u8 *GetWritableCodePtr() { return nullptr; } - - CCFlags GetCC() { return CCFlags(condition >> 28); } - void SetCC(CCFlags cond = CC_AL) {} - - // Special purpose instructions - - // Do nothing - void NOP(int count = 1) {} //nop padding - TODO: fast nop slides, for amd and intel (check their manuals) - -#ifdef CALL -#undef CALL -#endif - - void QuickCallFunction(FakeReg scratchreg, const void *func); - template void QuickCallFunction(FakeReg scratchreg, T func) { - QuickCallFunction(scratchreg, (const void *)func); - } -}; // class FakeXEmitter - - -// Everything that needs to generate machine code should inherit from this. -// You get memory management for free, plus, you can use all the MOV etc functions without -// having to prefix them with gen-> or something similar. -class FakeXCodeBlock : public FakeXEmitter -{ -protected: - u8 *region; - size_t region_size; - -public: - FakeXCodeBlock() : region(NULL), region_size(0) {} - virtual ~FakeXCodeBlock() { if (region) FreeCodeSpace(); } - - // Call this before you generate any code. - void AllocCodeSpace(int size) { } - - // Always clear code space with breakpoints, so that if someone accidentally executes - // uninitialized, it just breaks into the debugger. - void ClearCodeSpace() { } - - // Call this when shutting down. Don't rely on the destructor, even though it'll do the job. - void FreeCodeSpace() { } - - bool IsInSpace(const u8 *ptr) const - { - return ptr >= region && ptr < region + region_size; - } - - // Cannot currently be undone. Will write protect the entire code region. - // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()). - void WriteProtect() { } - void UnWriteProtect() { } - - void ResetCodePtr() - { - SetCodePtr(region); - } - - size_t GetSpaceLeft() const - { - return region_size - (GetCodePtr() - region); - } - - u8 *GetBasePtr() { - return region; - } - - size_t GetOffset(const u8 *ptr) const { - return ptr - region; - } -}; - -} // namespace diff --git a/src/common/platform.h b/src/common/platform.h index 08aaa03a4..118a9ed3b 100644 --- a/src/common/platform.h +++ b/src/common/platform.h @@ -27,7 +27,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// // Platform detection -#if defined(__x86_64__) || defined(_M_X86_64) || defined(__aarch64__) +#if defined(__x86_64__) || defined(ARCHITECTURE_X64) || defined(__aarch64__) #define EMU_ARCH_BITS 64 #elif defined(__i386) || defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) #define EMU_ARCH_BITS 32 diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp new file mode 100644 index 000000000..598e7f335 --- /dev/null +++ b/src/common/x64/abi.cpp @@ -0,0 +1,680 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "abi.h" +#include "emitter.h" + +using namespace Gen; + +// Shared code between Win64 and Unix64 + +// Sets up a __cdecl function. +void XEmitter::ABI_EmitPrologue(int maxCallParams) +{ +#ifdef _M_IX86 + // Don't really need to do anything +#elif defined(ARCHITECTURE_X64) +#if _WIN32 + int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8; + // Set up a stack frame so that we can call functions + // TODO: use maxCallParams + SUB(64, R(RSP), Imm8(stacksize)); +#endif +#else +#error Arch not supported +#endif +} + +void XEmitter::ABI_EmitEpilogue(int maxCallParams) +{ +#ifdef _M_IX86 + RET(); +#elif defined(ARCHITECTURE_X64) +#ifdef _WIN32 + int stacksize = ((maxCallParams+1)&~1)*8 + 8; + ADD(64, R(RSP), Imm8(stacksize)); +#endif + RET(); +#else +#error Arch not supported + + +#endif +} + +#ifdef _M_IX86 // All32 + +// Shared code between Win32 and Unix32 +void XEmitter::ABI_CallFunction(const void *func) { + ABI_AlignStack(0); + CALL(func); + ABI_RestoreStack(0); +} + +void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { + ABI_AlignStack(1 * 2); + PUSH(16, Imm16(param1)); + CALL(func); + ABI_RestoreStack(1 * 2); +} + +void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { + ABI_AlignStack(1 * 2 + 1 * 4); + PUSH(16, Imm16(param2)); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(1 * 2 + 1 * 4); +} + +void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { + ABI_AlignStack(1 * 4); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(1 * 4); +} + +void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { + ABI_AlignStack(2 * 4); + PUSH(32, Imm32(param2)); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(2 * 4); +} + +void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { + ABI_AlignStack(3 * 4); + PUSH(32, Imm32(param3)); + PUSH(32, Imm32(param2)); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(3 * 4); +} + +void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { + ABI_AlignStack(3 * 4); + PUSH(32, ImmPtr(param3)); + PUSH(32, Imm32(param2)); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(3 * 4); +} + +void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) { + ABI_AlignStack(4 * 4); + PUSH(32, ImmPtr(param4)); + PUSH(32, Imm32(param3)); + PUSH(32, Imm32(param2)); + PUSH(32, Imm32(param1)); + CALL(func); + ABI_RestoreStack(4 * 4); +} + +void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { + ABI_AlignStack(1 * 4); + PUSH(32, ImmPtr(param1)); + CALL(func); + ABI_RestoreStack(1 * 4); +} + +void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { + ABI_AlignStack(2 * 4); + PUSH(32, arg2); + PUSH(32, ImmPtr(param1)); + CALL(func); + ABI_RestoreStack(2 * 4); +} + +void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { + ABI_AlignStack(3 * 4); + PUSH(32, arg3); + PUSH(32, arg2); + PUSH(32, ImmPtr(param1)); + CALL(func); + ABI_RestoreStack(3 * 4); +} + +void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { + ABI_AlignStack(3 * 4); + PUSH(32, Imm32(param3)); + PUSH(32, ImmPtr(param2)); + PUSH(32, ImmPtr(param1)); + CALL(func); + ABI_RestoreStack(3 * 4); +} + +// Pass a register as a parameter. +void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { + ABI_AlignStack(1 * 4); + PUSH(32, R(reg1)); + CALL(func); + ABI_RestoreStack(1 * 4); +} + +// Pass two registers as parameters. +void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2) +{ + ABI_AlignStack(2 * 4); + PUSH(32, R(reg2)); + PUSH(32, R(reg1)); + CALL(func); + ABI_RestoreStack(2 * 4); +} + +void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) +{ + ABI_AlignStack(2 * 4); + PUSH(32, Imm32(param2)); + PUSH(32, arg1); + CALL(func); + ABI_RestoreStack(2 * 4); +} + +void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) +{ + ABI_AlignStack(3 * 4); + PUSH(32, Imm32(param3)); + PUSH(32, Imm32(param2)); + PUSH(32, arg1); + CALL(func); + ABI_RestoreStack(3 * 4); +} + +void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) +{ + ABI_AlignStack(1 * 4); + PUSH(32, arg1); + CALL(func); + ABI_RestoreStack(1 * 4); +} + +void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) +{ + ABI_AlignStack(2 * 4); + PUSH(32, arg2); + PUSH(32, arg1); + CALL(func); + ABI_RestoreStack(2 * 4); +} + +void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + // Note: 4 * 4 = 16 bytes, so alignment is preserved. + PUSH(EBP); + PUSH(EBX); + PUSH(ESI); + PUSH(EDI); +} + +void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { + POP(EDI); + POP(ESI); + POP(EBX); + POP(EBP); +} + +unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { + frameSize += 4; // reserve space for return address + unsigned int alignedSize = +#ifdef __GNUC__ + (frameSize + 15) & -16; +#else + (frameSize + 3) & -4; +#endif + return alignedSize; +} + + +void XEmitter::ABI_AlignStack(unsigned int frameSize) { +// Mac OS X requires the stack to be 16-byte aligned before every call. +// Linux requires the stack to be 16-byte aligned before calls that put SSE +// vectors on the stack, but since we do not keep track of which calls do that, +// it is effectively every call as well. +// Windows binaries compiled with MSVC do not have such a restriction*, but I +// expect that GCC on Windows acts the same as GCC on Linux in this respect. +// It would be nice if someone could verify this. +// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times. + unsigned int fillSize = + ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4); + if (fillSize != 0) { + SUB(32, R(ESP), Imm8(fillSize)); + } +} + +void XEmitter::ABI_RestoreStack(unsigned int frameSize) { + unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize); + alignedSize -= 4; // return address is POPped at end of call + if (alignedSize != 0) { + ADD(32, R(ESP), Imm8(alignedSize)); + } +} + +#else //64bit + +// Common functions +void XEmitter::ABI_CallFunction(const void *func) { + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { + MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { + MOV(32, R(ABI_PARAM1), Imm32(param1)); + MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { + MOV(32, R(ABI_PARAM1), Imm32(param1)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { + MOV(32, R(ABI_PARAM1), Imm32(param1)); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { + MOV(32, R(ABI_PARAM1), Imm32(param1)); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { + MOV(32, R(ABI_PARAM1), Imm32(param1)); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + MOV(64, R(ABI_PARAM3), ImmPtr(param3)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) { + MOV(32, R(ABI_PARAM1), Imm32(param1)); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + MOV(64, R(ABI_PARAM4), ImmPtr(param4)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { + MOV(64, R(ABI_PARAM1), ImmPtr(param1)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { + MOV(64, R(ABI_PARAM1), ImmPtr(param1)); + if (!arg2.IsSimpleReg(ABI_PARAM2)) + MOV(32, R(ABI_PARAM2), arg2); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { + MOV(64, R(ABI_PARAM1), ImmPtr(param1)); + if (!arg2.IsSimpleReg(ABI_PARAM2)) + MOV(32, R(ABI_PARAM2), arg2); + if (!arg3.IsSimpleReg(ABI_PARAM3)) + MOV(32, R(ABI_PARAM3), arg3); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { + MOV(64, R(ABI_PARAM1), ImmPtr(param1)); + MOV(64, R(ABI_PARAM2), ImmPtr(param2)); + MOV(32, R(ABI_PARAM3), Imm32(param3)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +// Pass a register as a parameter. +void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { + if (reg1 != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(reg1)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +// Pass two registers as parameters. +void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { + if (reg2 != ABI_PARAM1) { + if (reg1 != ABI_PARAM1) + MOV(64, R(ABI_PARAM1), R(reg1)); + if (reg2 != ABI_PARAM2) + MOV(64, R(ABI_PARAM2), R(reg2)); + } else { + if (reg2 != ABI_PARAM2) + MOV(64, R(ABI_PARAM2), R(reg2)); + if (reg1 != ABI_PARAM1) + MOV(64, R(ABI_PARAM1), R(reg1)); + } + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) +{ + if (!arg1.IsSimpleReg(ABI_PARAM1)) + MOV(32, R(ABI_PARAM1), arg1); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) +{ + if (!arg1.IsSimpleReg(ABI_PARAM1)) + MOV(32, R(ABI_PARAM1), arg1); + MOV(32, R(ABI_PARAM2), Imm32(param2)); + MOV(64, R(ABI_PARAM3), Imm64(param3)); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) +{ + if (!arg1.IsSimpleReg(ABI_PARAM1)) + MOV(32, R(ABI_PARAM1), arg1); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) +{ + if (!arg1.IsSimpleReg(ABI_PARAM1)) + MOV(32, R(ABI_PARAM1), arg1); + if (!arg2.IsSimpleReg(ABI_PARAM2)) + MOV(32, R(ABI_PARAM2), arg2); + u64 distance = u64(func) - (u64(code) + 5); + if (distance >= 0x0000000080000000ULL + && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); + } else { + CALL(func); + } +} + +unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { + return frameSize; +} + +#ifdef _WIN32 + +// The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs. +// But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs. +// Let's just save all 16. +const int XMM_STACK_SPACE = 16 * 16; + +// Win64 Specific Code +void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + //we only want to do this once + PUSH(RBX); + PUSH(RSI); + PUSH(RDI); + PUSH(RBP); + PUSH(R12); + PUSH(R13); + PUSH(R14); + PUSH(R15); + ABI_AlignStack(0); + + // Do this after aligning, because before it's offset by 8. + SUB(64, R(RSP), Imm32(XMM_STACK_SPACE)); + for (int i = 0; i < 16; ++i) + MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i)); +} + +void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { + for (int i = 0; i < 16; ++i) + MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16)); + ADD(64, R(RSP), Imm32(XMM_STACK_SPACE)); + + ABI_RestoreStack(0); + POP(R15); + POP(R14); + POP(R13); + POP(R12); + POP(RBP); + POP(RDI); + POP(RSI); + POP(RBX); +} + +// Win64 Specific Code +void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { + PUSH(RCX); + PUSH(RDX); + PUSH(RSI); + PUSH(RDI); + PUSH(R8); + PUSH(R9); + PUSH(R10); + PUSH(R11); + // TODO: Callers preserve XMM4-5 (XMM0-3 are args.) + ABI_AlignStack(0); +} + +void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { + ABI_RestoreStack(0); + POP(R11); + POP(R10); + POP(R9); + POP(R8); + POP(RDI); + POP(RSI); + POP(RDX); + POP(RCX); +} + +void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { + SUB(64, R(RSP), Imm8(0x28)); +} + +void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { + ADD(64, R(RSP), Imm8(0x28)); +} + +#else +// Unix64 Specific Code +void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + PUSH(RBX); + PUSH(RBP); + PUSH(R12); + PUSH(R13); + PUSH(R14); + PUSH(R15); + PUSH(R15); //just to align stack. duped push/pop doesn't hurt. + // TODO: XMM? +} + +void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { + POP(R15); + POP(R15); + POP(R14); + POP(R13); + POP(R12); + POP(RBP); + POP(RBX); +} + +void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { + PUSH(RCX); + PUSH(RDX); + PUSH(RSI); + PUSH(RDI); + PUSH(R8); + PUSH(R9); + PUSH(R10); + PUSH(R11); + PUSH(R11); +} + +void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { + POP(R11); + POP(R11); + POP(R10); + POP(R9); + POP(R8); + POP(RDI); + POP(RSI); + POP(RDX); + POP(RCX); +} + +void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { + SUB(64, R(RSP), Imm8(0x08)); +} + +void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { + ADD(64, R(RSP), Imm8(0x08)); +} + +#endif // WIN32 + +#endif // 32bit diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h new file mode 100644 index 000000000..0ee189d45 --- /dev/null +++ b/src/common/x64/abi.h @@ -0,0 +1,78 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#pragma once + +#include "common/common_types.h" + +// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code. +// All convensions return values in EAX (+ possibly EDX). + +// Linux 32-bit, Windows 32-bit (cdecl, System V): +// * Caller pushes left to right +// * Caller fixes stack after call +// * function subtract from stack for local storage only. +// Scratch: EAX ECX EDX +// Callee-save: EBX ESI EDI EBP +// Parameters: - + +// Windows 64-bit +// * 4-reg "fastcall" variant, very new-skool stack handling +// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ +// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space. +// Scratch: RAX RCX RDX R8 R9 R10 R11 +// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15 +// Parameters: RCX RDX R8 R9, further MOV-ed + +// Linux 64-bit +// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed) +// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11 +// Callee-save: RBX RBP R12 R13 R14 R15 +// Parameters: RDI RSI RDX RCX R8 R9 + +#ifdef _M_IX86 // 32 bit calling convention, shared by all + +// 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to +// choose regs to put stuff in. +#define ABI_PARAM1 RCX +#define ABI_PARAM2 RDX + +// There are no ABI_PARAM* here, since args are pushed. +// 32-bit bog standard cdecl, shared between linux and windows +// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about. + +#elif ARCHITECTURE_X64 // 64 bit calling convention + +#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention + +#define ABI_PARAM1 RCX +#define ABI_PARAM2 RDX +#define ABI_PARAM3 R8 +#define ABI_PARAM4 R9 + +#else //64-bit Unix (hopefully MacOSX too) + +#define ABI_PARAM1 RDI +#define ABI_PARAM2 RSI +#define ABI_PARAM3 RDX +#define ABI_PARAM4 RCX +#define ABI_PARAM5 R8 +#define ABI_PARAM6 R9 + +#endif // WIN32 + +#endif // X86 diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp new file mode 100644 index 000000000..4e1c43d6c --- /dev/null +++ b/src/common/x64/emitter.cpp @@ -0,0 +1,1989 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include + +#include "common/assert.h" +#include "common/cpu_detect.h" +#include "common/logging/log.h" +#include "common/memory_util.h" + +#include "abi.h" +#include "emitter.h" + +#define PRIx64 "llx" + +// Minimize the diff against Dolphin +#define DYNA_REC JIT + +namespace Gen +{ + +struct NormalOpDef +{ + u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; +}; + +// 0xCC is code for invalid combination of immediates +static const NormalOpDef normalops[11] = +{ + {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD + {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC + + {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB + {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB + + {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND + {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR + + {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR + {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV + + {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from) + {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP + + {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG +}; + +enum NormalSSEOps +{ + sseCMP = 0xC2, + sseADD = 0x58, //ADD + sseSUB = 0x5C, //SUB + sseAND = 0x54, //AND + sseANDN = 0x55, //ANDN + sseOR = 0x56, + sseXOR = 0x57, + sseMUL = 0x59, //MUL + sseDIV = 0x5E, //DIV + sseMIN = 0x5D, //MIN + sseMAX = 0x5F, //MAX + sseCOMIS = 0x2F, //COMIS + sseUCOMIS = 0x2E, //UCOMIS + sseSQRT = 0x51, //SQRT + sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) + sseRCP = 0x53, //RCP + sseMOVAPfromRM = 0x28, //MOVAP from RM + sseMOVAPtoRM = 0x29, //MOVAP to RM + sseMOVUPfromRM = 0x10, //MOVUP from RM + sseMOVUPtoRM = 0x11, //MOVUP to RM + sseMOVLPfromRM= 0x12, + sseMOVLPtoRM = 0x13, + sseMOVHPfromRM= 0x16, + sseMOVHPtoRM = 0x17, + sseMOVHLPS = 0x12, + sseMOVLHPS = 0x16, + sseMOVDQfromRM = 0x6F, + sseMOVDQtoRM = 0x7F, + sseMASKMOVDQU = 0xF7, + sseLDDQU = 0xF0, + sseSHUF = 0xC6, + sseMOVNTDQ = 0xE7, + sseMOVNTP = 0x2B, + sseHADD = 0x7C, +}; + + +void XEmitter::SetCodePtr(u8 *ptr) +{ + code = ptr; +} + +const u8 *XEmitter::GetCodePtr() const +{ + return code; +} + +u8 *XEmitter::GetWritableCodePtr() +{ + return code; +} + +void XEmitter::ReserveCodeSpace(int bytes) +{ + for (int i = 0; i < bytes; i++) + *code++ = 0xCC; +} + +const u8 *XEmitter::AlignCode4() +{ + int c = int((u64)code & 3); + if (c) + ReserveCodeSpace(4-c); + return code; +} + +const u8 *XEmitter::AlignCode16() +{ + int c = int((u64)code & 15); + if (c) + ReserveCodeSpace(16-c); + return code; +} + +const u8 *XEmitter::AlignCodePage() +{ + int c = int((u64)code & 4095); + if (c) + ReserveCodeSpace(4096-c); + return code; +} + +// This operation modifies flags; check to see the flags are locked. +// If the flags are locked, we should immediately and loudly fail before +// causing a subtle JIT bug. +void XEmitter::CheckFlags() +{ + ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!"); +} + +void XEmitter::WriteModRM(int mod, int reg, int rm) +{ + Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); +} + +void XEmitter::WriteSIB(int scale, int index, int base) +{ + Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); +} + +void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const +{ + if (customOp == -1) customOp = operandReg; +#ifdef ARCHITECTURE_X64 + u8 op = 0x40; + // REX.W (whether operation is a 64-bit operation) + if (opBits == 64) op |= 8; + // REX.R (whether ModR/M reg field refers to R8-R15. + if (customOp & 8) op |= 4; + // REX.X (whether ModR/M SIB index field refers to R8-R15) + if (indexReg & 8) op |= 2; + // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) + if (offsetOrBaseReg & 8) op |= 1; + // Write REX if wr have REX bits to write, or if the operation accesses + // SIL, DIL, BPL, or SPL. + if (op != 0x40 || + (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || + (opBits == 8 && (customOp & 0x10c) == 4)) + { + emit->Write8(op); + // Check the operation doesn't access AH, BH, CH, or DH. + DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0); + DEBUG_ASSERT((customOp & 0x100) == 0); + } +#else + DEBUG_ASSERT(opBits != 64); + DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1); + DEBUG_ASSERT((indexReg & 8) == 0); + DEBUG_ASSERT((offsetOrBaseReg & 8) == 0); + DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); + DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4); +#endif +} + +void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const +{ + int R = !(regOp1 & 8); + int X = !(indexReg & 8); + int B = !(offsetOrBaseReg & 8); + + int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); + + // do we need any VEX fields that only appear in the three-byte form? + if (X == 1 && B == 1 && W == 0 && mmmmm == 1) + { + u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 1) | pp; + emit->Write8(0xC5); + emit->Write8(RvvvvLpp); + } + else + { + u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; + u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 1) | pp; + emit->Write8(0xC4); + emit->Write8(RXBmmmmm); + emit->Write8(WvvvvLpp); + } +} + +void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, + bool warn_64bit_offset) const +{ + if (_operandReg == INVALID_REG) + _operandReg = (X64Reg)this->operandReg; + int mod = 0; + int ireg = indexReg; + bool SIB = false; + int _offsetOrBaseReg = this->offsetOrBaseReg; + + if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address + { + // Oh, RIP addressing. + _offsetOrBaseReg = 5; + emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); + //TODO : add some checks +#ifdef ARCHITECTURE_X64 + u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; + s64 distance = (s64)offset - (s64)ripAddr; + ASSERT_MSG( + (distance < 0x80000000LL && + distance >= -0x80000000LL) || + !warn_64bit_offset, + "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", + ripAddr, offset); + s32 offs = (s32)distance; + emit->Write32((u32)offs); +#else + emit->Write32((u32)offset); +#endif + return; + } + + if (scale == 0) + { + // Oh, no memory, Just a reg. + mod = 3; //11 + } + else if (scale >= 1) + { + //Ah good, no scaling. + if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) + { + //Okay, we're good. No SIB necessary. + int ioff = (int)offset; + if (ioff == 0) + { + mod = 0; + } + else if (ioff<-128 || ioff>127) + { + mod = 2; //32-bit displacement + } + else + { + mod = 1; //8-bit displacement + } + } + else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) + { + SIB = true; + mod = 0; + _offsetOrBaseReg = 5; + } + else //if (scale != SCALE_ATREG) + { + if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :( + { + //So we have to fake it with SIB encoding :( + SIB = true; + } + + if (scale >= SCALE_1 && scale < SCALE_ATREG) + { + SIB = true; + } + + if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) + { + SIB = true; + ireg = _offsetOrBaseReg; + } + + //Okay, we're fine. Just disp encoding. + //We need displacement. Which size? + int ioff = (int)(s64)offset; + if (ioff < -128 || ioff > 127) + { + mod = 2; //32-bit displacement + } + else + { + mod = 1; //8-bit displacement + } + } + } + + // Okay. Time to do the actual writing + // ModRM byte: + int oreg = _offsetOrBaseReg; + if (SIB) + oreg = 4; + + // TODO(ector): WTF is this if about? I don't remember writing it :-) + //if (RIP) + // oreg = 5; + + emit->WriteModRM(mod, _operandReg&7, oreg&7); + + if (SIB) + { + //SIB byte + int ss; + switch (scale) + { + case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP + case SCALE_1: ss = 0; break; + case SCALE_2: ss = 1; break; + case SCALE_4: ss = 2; break; + case SCALE_8: ss = 3; break; + case SCALE_NOBASE_2: ss = 1; break; + case SCALE_NOBASE_4: ss = 2; break; + case SCALE_NOBASE_8: ss = 3; break; + case SCALE_ATREG: ss = 0; break; + default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break; + } + emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); + } + + if (mod == 1) //8-bit disp + { + emit->Write8((u8)(s8)(s32)offset); + } + else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp + { + emit->Write32((u32)offset); + } +} + +// W = operand extended width (1 if 64-bit) +// R = register# upper bit +// X = scale amnt upper bit +// B = base register# upper bit +void XEmitter::Rex(int w, int r, int x, int b) +{ + w = w ? 1 : 0; + r = r ? 1 : 0; + x = x ? 1 : 0; + b = b ? 1 : 0; + u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b)); + if (rx != 0x40) + Write8(rx); +} + +void XEmitter::JMP(const u8 *addr, bool force5Bytes) +{ + u64 fn = (u64)addr; + if (!force5Bytes) + { + s64 distance = (s64)(fn - ((u64)code + 2)); + ASSERT_MSG(distance >= -0x80 && distance < 0x80, + "Jump target too far away, needs force5Bytes = true"); + //8 bits will do + Write8(0xEB); + Write8((u8)(s8)distance); + } + else + { + s64 distance = (s64)(fn - ((u64)code + 5)); + + ASSERT_MSG( + distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); + Write8(0xE9); + Write32((u32)(s32)distance); + } +} + +void XEmitter::JMPptr(const OpArg &arg2) +{ + OpArg arg = arg2; + if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); + arg.operandReg = 4; + arg.WriteRex(this, 0, 0); + Write8(0xFF); + arg.WriteRest(this); +} + +//Can be used to trap other processors, before overwriting their code +// not used in dolphin +void XEmitter::JMPself() +{ + Write8(0xEB); + Write8(0xFE); +} + +void XEmitter::CALLptr(OpArg arg) +{ + if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument"); + arg.operandReg = 2; + arg.WriteRex(this, 0, 0); + Write8(0xFF); + arg.WriteRest(this); +} + +void XEmitter::CALL(const void *fnptr) +{ + u64 distance = u64(fnptr) - (u64(code) + 5); + ASSERT_MSG( + distance < 0x0000000080000000ULL || + distance >= 0xFFFFFFFF80000000ULL, + "CALL out of range (%p calls %p)", code, fnptr); + Write8(0xE8); + Write32(u32(distance)); +} + +FixupBranch XEmitter::J(bool force5bytes) +{ + FixupBranch branch; + branch.type = force5bytes ? 1 : 0; + branch.ptr = code + (force5bytes ? 5 : 2); + if (!force5bytes) + { + //8 bits will do + Write8(0xEB); + Write8(0); + } + else + { + Write8(0xE9); + Write32(0); + } + return branch; +} + +FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) +{ + FixupBranch branch; + branch.type = force5bytes ? 1 : 0; + branch.ptr = code + (force5bytes ? 6 : 2); + if (!force5bytes) + { + //8 bits will do + Write8(0x70 + conditionCode); + Write8(0); + } + else + { + Write8(0x0F); + Write8(0x80 + conditionCode); + Write32(0); + } + return branch; +} + +void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) +{ + u64 fn = (u64)addr; + s64 distance = (s64)(fn - ((u64)code + 2)); + if (distance < -0x80 || distance >= 0x80 || force5bytes) + { + distance = (s64)(fn - ((u64)code + 6)); + ASSERT_MSG( + distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); + Write8(0x0F); + Write8(0x80 + conditionCode); + Write32((u32)(s32)distance); + } + else + { + Write8(0x70 + conditionCode); + Write8((u8)(s8)distance); + } +} + +void XEmitter::SetJumpTarget(const FixupBranch &branch) +{ + if (branch.type == 0) + { + s64 distance = (s64)(code - branch.ptr); + ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + branch.ptr[-1] = (u8)(s8)distance; + } + else if (branch.type == 1) + { + s64 distance = (s64)(code - branch.ptr); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); + ((s32*)branch.ptr)[-1] = (s32)distance; + } +} + +// INC/DEC considered harmful on newer CPUs due to partial flag set. +// Use ADD, SUB instead. + +/* +void XEmitter::INC(int bits, OpArg arg) +{ + if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument"); + arg.operandReg = 0; + if (bits == 16) {Write8(0x66);} + arg.WriteRex(this, bits, bits); + Write8(bits == 8 ? 0xFE : 0xFF); + arg.WriteRest(this); +} +void XEmitter::DEC(int bits, OpArg arg) +{ + if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument"); + arg.operandReg = 1; + if (bits == 16) {Write8(0x66);} + arg.WriteRex(this, bits, bits); + Write8(bits == 8 ? 0xFE : 0xFF); + arg.WriteRest(this); +} +*/ + +//Single byte opcodes +//There is no PUSHAD/POPAD in 64-bit mode. +void XEmitter::INT3() {Write8(0xCC);} +void XEmitter::RET() {Write8(0xC3);} +void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret + +// The first sign of decadence: optimized NOPs. +void XEmitter::NOP(size_t size) +{ + DEBUG_ASSERT((int)size > 0); + while (true) + { + switch (size) + { + case 0: + return; + case 1: + Write8(0x90); + return; + case 2: + Write8(0x66); Write8(0x90); + return; + case 3: + Write8(0x0F); Write8(0x1F); Write8(0x00); + return; + case 4: + Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00); + return; + case 5: + Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00); + Write8(0x00); + return; + case 6: + Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44); + Write8(0x00); Write8(0x00); + return; + case 7: + Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00); + Write8(0x00); Write8(0x00); Write8(0x00); + return; + case 8: + Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00); + Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); + return; + case 9: + Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84); + Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x00); + return; + case 10: + Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F); + Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x00); Write8(0x00); + return; + default: + // Even though x86 instructions are allowed to be up to 15 bytes long, + // AMD advises against using NOPs longer than 11 bytes because they + // carry a performance penalty on CPUs older than AMD family 16h. + Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F); + Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00); + Write8(0x00); Write8(0x00); Write8(0x00); + size -= 11; + continue; + } + } +} + +void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu +void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry +void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry +void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry + +//TODO: xchg ah, al ??? +void XEmitter::XCHG_AHAL() +{ + Write8(0x86); + Write8(0xe0); + // alt. 86 c4 +} + +//These two can not be executed on early Intel 64-bit CPU:s, only on AMD! +void XEmitter::LAHF() {Write8(0x9F);} +void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);} + +void XEmitter::PUSHF() {Write8(0x9C);} +void XEmitter::POPF() {CheckFlags(); Write8(0x9D);} + +void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} +void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} +void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} + +void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) +{ + if (bits == 16) + Write8(0x66); + Rex(bits == 64, 0, 0, (int)reg >> 3); + Write8(byte + ((int)reg & 7)); +} + +void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) +{ + if (bits == 16) + Write8(0x66); + Rex(bits==64, 0, 0, (int)reg >> 3); + Write8(byte1); + Write8(byte2 + ((int)reg & 7)); +} + +void XEmitter::CWD(int bits) +{ + if (bits == 16) + Write8(0x66); + Rex(bits == 64, 0, 0, 0); + Write8(0x99); +} + +void XEmitter::CBW(int bits) +{ + if (bits == 8) + Write8(0x66); + Rex(bits == 32, 0, 0, 0); + Write8(0x98); +} + +//Simple opcodes + + +//push/pop do not need wide to be 64-bit +void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} +void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} + +void XEmitter::PUSH(int bits, const OpArg ®) +{ + if (reg.IsSimpleReg()) + PUSH(reg.GetSimpleReg()); + else if (reg.IsImm()) + { + switch (reg.GetImmBits()) + { + case 8: + Write8(0x6A); + Write8((u8)(s8)reg.offset); + break; + case 16: + Write8(0x66); + Write8(0x68); + Write16((u16)(s16)(s32)reg.offset); + break; + case 32: + Write8(0x68); + Write32((u32)reg.offset); + break; + default: + ASSERT_MSG(0, "PUSH - Bad imm bits"); + break; + } + } + else + { + if (bits == 16) + Write8(0x66); + reg.WriteRex(this, bits, bits); + Write8(0xFF); + reg.WriteRest(this, 0, (X64Reg)6); + } +} + +void XEmitter::POP(int /*bits*/, const OpArg ®) +{ + if (reg.IsSimpleReg()) + POP(reg.GetSimpleReg()); + else + ASSERT_MSG(0, "POP - Unsupported encoding"); +} + +void XEmitter::BSWAP(int bits, X64Reg reg) +{ + if (bits >= 32) + { + WriteSimple2Byte(bits, 0x0F, 0xC8, reg); + } + else if (bits == 16) + { + ROL(16, R(reg), Imm8(8)); + } + else if (bits == 8) + { + // Do nothing - can't bswap a single byte... + } + else + { + ASSERT_MSG(0, "BSWAP - Wrong number of bits"); + } +} + +// Undefined opcode - reserved +// If we ever need a way to always cause a non-breakpoint hard exception... +void XEmitter::UD2() +{ + Write8(0x0F); + Write8(0x0B); +} + +void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) +{ + ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument"); + arg.operandReg = (u8)level; + arg.WriteRex(this, 0, 0); + Write8(0x0F); + Write8(0x18); + arg.WriteRest(this); +} + +void XEmitter::SETcc(CCFlags flag, OpArg dest) +{ + ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument"); + dest.operandReg = 0; + dest.WriteRex(this, 0, 8); + Write8(0x0F); + Write8(0x90 + (u8)flag); + dest.WriteRest(this); +} + +void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) +{ + ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument"); + ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported"); + if (bits == 16) + Write8(0x66); + src.operandReg = dest; + src.WriteRex(this, bits, bits); + Write8(0x0F); + Write8(0x40 + (u8)flag); + src.WriteRest(this); +} + +void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) +{ + ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument"); + CheckFlags(); + src.operandReg = ext; + if (bits == 16) + Write8(0x66); + src.WriteRex(this, bits, bits, 0); + if (bits == 8) + { + Write8(0xF6); + } + else + { + Write8(0xF7); + } + src.WriteRest(this); +} + +void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);} +void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);} +void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} +void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} +void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} +void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} + +void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) +{ + ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument"); + CheckFlags(); + src.operandReg = (u8)dest; + if (bits == 16) + Write8(0x66); + if (rep) + Write8(0xF3); + src.WriteRex(this, bits, bits); + Write8(0x0F); + Write8(byte2); + src.WriteRest(this); +} + +void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) +{ + if (bits <= 16) + ASSERT_MSG(0, "MOVNTI - bits<=16"); + WriteBitSearchType(bits, src, dest, 0xC3); +} + +void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit +void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit + +void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) +{ + CheckFlags(); + if (!Common::cpu_info.bBMI1) + ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); + WriteBitSearchType(bits, dest, src, 0xBC, true); +} +void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) +{ + CheckFlags(); + if (!Common::cpu_info.bLZCNT) + ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer."); + WriteBitSearchType(bits, dest, src, 0xBD, true); +} + +void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) +{ + ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument"); + if (dbits == sbits) + { + MOV(dbits, R(dest), src); + return; + } + src.operandReg = (u8)dest; + if (dbits == 16) + Write8(0x66); + src.WriteRex(this, dbits, sbits); + if (sbits == 8) + { + Write8(0x0F); + Write8(0xBE); + } + else if (sbits == 16) + { + Write8(0x0F); + Write8(0xBF); + } + else if (sbits == 32 && dbits == 64) + { + Write8(0x63); + } + else + { + Crash(); + } + src.WriteRest(this); +} + +void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) +{ + ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument"); + if (dbits == sbits) + { + MOV(dbits, R(dest), src); + return; + } + src.operandReg = (u8)dest; + if (dbits == 16) + Write8(0x66); + //the 32bit result is automatically zero extended to 64bit + src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); + if (sbits == 8) + { + Write8(0x0F); + Write8(0xB6); + } + else if (sbits == 16) + { + Write8(0x0F); + Write8(0xB7); + } + else if (sbits == 32 && dbits == 64) + { + Write8(0x8B); + } + else + { + ASSERT_MSG(0, "MOVZX - Invalid size"); + } + src.WriteRest(this); +} + +void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) +{ + ASSERT_MSG(Common::cpu_info.bMOVBE, "Generating MOVBE on a system that does not support it."); + if (bits == 8) + { + MOV(bits, dest, src); + return; + } + + if (bits == 16) + Write8(0x66); + + if (dest.IsSimpleReg()) + { + ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); + src.WriteRex(this, bits, bits, dest.GetSimpleReg()); + Write8(0x0F); Write8(0x38); Write8(0xF0); + src.WriteRest(this, 0, dest.GetSimpleReg()); + } + else if (src.IsSimpleReg()) + { + ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); + dest.WriteRex(this, bits, bits, src.GetSimpleReg()); + Write8(0x0F); Write8(0x38); Write8(0xF1); + dest.WriteRest(this, 0, src.GetSimpleReg()); + } + else + { + ASSERT_MSG(0, "MOVBE: Not loading or storing to mem"); + } +} + + +void XEmitter::LEA(int bits, X64Reg dest, OpArg src) +{ + ASSERT_MSG(!src.IsImm(), "LEA - Imm argument"); + src.operandReg = (u8)dest; + if (bits == 16) + Write8(0x66); //TODO: performance warning + src.WriteRex(this, bits, bits); + Write8(0x8D); + src.WriteRest(this, 0, INVALID_REG, bits == 64); +} + +//shift can be either imm8 or cl +void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) +{ + CheckFlags(); + bool writeImm = false; + if (dest.IsImm()) + { + ASSERT_MSG(0, "WriteShift - can't shift imms"); + } + if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) + { + ASSERT_MSG(0, "WriteShift - illegal argument"); + } + dest.operandReg = ext; + if (bits == 16) + Write8(0x66); + dest.WriteRex(this, bits, bits, 0); + if (shift.GetImmBits() == 8) + { + //ok an imm + u8 imm = (u8)shift.offset; + if (imm == 1) + { + Write8(bits == 8 ? 0xD0 : 0xD1); + } + else + { + writeImm = true; + Write8(bits == 8 ? 0xC0 : 0xC1); + } + } + else + { + Write8(bits == 8 ? 0xD2 : 0xD3); + } + dest.WriteRest(this, writeImm ? 1 : 0); + if (writeImm) + Write8((u8)shift.offset); +} + +// large rotates and shift are slower on intel than amd +// intel likes to rotate by 1, and the op is smaller too +void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} +void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} +void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} +void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} +void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} +void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} +void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} + +// index can be either imm8 or register, don't use memory destination because it's slow +void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) +{ + CheckFlags(); + if (dest.IsImm()) + { + ASSERT_MSG(0, "WriteBitTest - can't test imms"); + } + if ((index.IsImm() && index.GetImmBits() != 8)) + { + ASSERT_MSG(0, "WriteBitTest - illegal argument"); + } + if (bits == 16) + Write8(0x66); + if (index.IsImm()) + { + dest.WriteRex(this, bits, bits); + Write8(0x0F); Write8(0xBA); + dest.WriteRest(this, 1, (X64Reg)ext); + Write8((u8)index.offset); + } + else + { + X64Reg operand = index.GetSimpleReg(); + dest.WriteRex(this, bits, bits, operand); + Write8(0x0F); Write8(0x83 + 8*ext); + dest.WriteRest(this, 1, operand); + } +} + +void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);} +void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} +void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} +void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} + +//shift can be either imm8 or cl +void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) +{ + CheckFlags(); + if (dest.IsImm()) + { + ASSERT_MSG(0, "SHRD - can't use imms as destination"); + } + if (!src.IsSimpleReg()) + { + ASSERT_MSG(0, "SHRD - must use simple register as source"); + } + if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) + { + ASSERT_MSG(0, "SHRD - illegal shift"); + } + if (bits == 16) + Write8(0x66); + X64Reg operand = src.GetSimpleReg(); + dest.WriteRex(this, bits, bits, operand); + if (shift.GetImmBits() == 8) + { + Write8(0x0F); Write8(0xAC); + dest.WriteRest(this, 1, operand); + Write8((u8)shift.offset); + } + else + { + Write8(0x0F); Write8(0xAD); + dest.WriteRest(this, 0, operand); + } +} + +void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) +{ + CheckFlags(); + if (dest.IsImm()) + { + ASSERT_MSG(0, "SHLD - can't use imms as destination"); + } + if (!src.IsSimpleReg()) + { + ASSERT_MSG(0, "SHLD - must use simple register as source"); + } + if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) + { + ASSERT_MSG(0, "SHLD - illegal shift"); + } + if (bits == 16) + Write8(0x66); + X64Reg operand = src.GetSimpleReg(); + dest.WriteRex(this, bits, bits, operand); + if (shift.GetImmBits() == 8) + { + Write8(0x0F); Write8(0xA4); + dest.WriteRest(this, 1, operand); + Write8((u8)shift.offset); + } + else + { + Write8(0x0F); Write8(0xA5); + dest.WriteRest(this, 0, operand); + } +} + +void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits) +{ + if (bits == 16) + emit->Write8(0x66); + + this->operandReg = (u8)_operandReg; + WriteRex(emit, bits, bits); + emit->Write8(op); + WriteRest(emit); +} + +//operand can either be immediate or register +void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const +{ + X64Reg _operandReg; + if (IsImm()) + { + ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order"); + } + + if (bits == 16) + emit->Write8(0x66); + + int immToWrite = 0; + + if (operand.IsImm()) + { + WriteRex(emit, bits, bits); + + if (!toRM) + { + ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)"); + } + + if (operand.scale == SCALE_IMM8 && bits == 8) + { + // op al, imm8 + if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) + { + emit->Write8(normalops[op].eaximm8); + emit->Write8((u8)operand.offset); + return; + } + // mov reg, imm8 + if (!scale && op == nrmMOV) + { + emit->Write8(0xB0 + (offsetOrBaseReg & 7)); + emit->Write8((u8)operand.offset); + return; + } + // op r/m8, imm8 + emit->Write8(normalops[op].imm8); + immToWrite = 8; + } + else if ((operand.scale == SCALE_IMM16 && bits == 16) || + (operand.scale == SCALE_IMM32 && bits == 32) || + (operand.scale == SCALE_IMM32 && bits == 64)) + { + // Try to save immediate size if we can, but first check to see + // if the instruction supports simm8. + // op r/m, imm8 + if (normalops[op].simm8 != 0xCC && + ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || + (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) + { + emit->Write8(normalops[op].simm8); + immToWrite = 8; + } + else + { + // mov reg, imm + if (!scale && op == nrmMOV && bits != 64) + { + emit->Write8(0xB8 + (offsetOrBaseReg & 7)); + if (bits == 16) + emit->Write16((u16)operand.offset); + else + emit->Write32((u32)operand.offset); + return; + } + // op eax, imm + if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) + { + emit->Write8(normalops[op].eaximm32); + if (bits == 16) + emit->Write16((u16)operand.offset); + else + emit->Write32((u32)operand.offset); + return; + } + // op r/m, imm + emit->Write8(normalops[op].imm32); + immToWrite = bits == 16 ? 16 : 32; + } + } + else if ((operand.scale == SCALE_IMM8 && bits == 16) || + (operand.scale == SCALE_IMM8 && bits == 32) || + (operand.scale == SCALE_IMM8 && bits == 64)) + { + // op r/m, imm8 + emit->Write8(normalops[op].simm8); + immToWrite = 8; + } + else if (operand.scale == SCALE_IMM64 && bits == 64) + { + if (scale) + { + ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); + } + // mov reg64, imm64 + else if (op == nrmMOV) + { + emit->Write8(0xB8 + (offsetOrBaseReg & 7)); + emit->Write64((u64)operand.offset); + return; + } + ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm"); + } + else + { + ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); + } + _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM + } + else + { + _operandReg = (X64Reg)operand.offsetOrBaseReg; + WriteRex(emit, bits, bits, _operandReg); + // op r/m, reg + if (toRM) + { + emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); + } + // op reg, r/m + else + { + emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); + } + } + WriteRest(emit, immToWrite >> 3, _operandReg); + switch (immToWrite) + { + case 0: + break; + case 8: + emit->Write8((u8)operand.offset); + break; + case 16: + emit->Write16((u16)operand.offset); + break; + case 32: + emit->Write32((u32)operand.offset); + break; + default: + ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); + } +} + +void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2) +{ + if (a1.IsImm()) + { + //Booh! Can't write to an imm + ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm"); + return; + } + if (a2.IsImm()) + { + a1.WriteNormalOp(emit, true, op, a2, bits); + } + else + { + if (a1.IsSimpleReg()) + { + a2.WriteNormalOp(emit, false, op, a1, bits); + } + else + { + ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory"); + a1.WriteNormalOp(emit, true, op, a2, bits); + } + } +} + +void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} +void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} +void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} +void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} +void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} +void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} +void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} +void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) +{ + if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) + LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); + WriteNormalOp(this, bits, nrmMOV, a1, a2); +} +void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} +void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} +void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} + +void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) +{ + CheckFlags(); + if (bits == 8) + { + ASSERT_MSG(0, "IMUL - illegal bit size!"); + return; + } + + if (a1.IsImm()) + { + ASSERT_MSG(0, "IMUL - second arg cannot be imm!"); + return; + } + + if (!a2.IsImm()) + { + ASSERT_MSG(0, "IMUL - third arg must be imm!"); + return; + } + + if (bits == 16) + Write8(0x66); + a1.WriteRex(this, bits, bits, regOp); + + if (a2.GetImmBits() == 8 || + (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || + (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) + { + Write8(0x6B); + a1.WriteRest(this, 1, regOp); + Write8((u8)a2.offset); + } + else + { + Write8(0x69); + if (a2.GetImmBits() == 16 && bits == 16) + { + a1.WriteRest(this, 2, regOp); + Write16((u16)a2.offset); + } + else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) + { + a1.WriteRest(this, 4, regOp); + Write32((u32)a2.offset); + } + else + { + ASSERT_MSG(0, "IMUL - unhandled case!"); + } + } +} + +void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) +{ + CheckFlags(); + if (bits == 8) + { + ASSERT_MSG(0, "IMUL - illegal bit size!"); + return; + } + + if (a.IsImm()) + { + IMUL(bits, regOp, R(regOp), a) ; + return; + } + + if (bits == 16) + Write8(0x66); + a.WriteRex(this, bits, bits, regOp); + Write8(0x0F); + Write8(0xAF); + a.WriteRest(this, 0, regOp); +} + + +void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +{ + if (opPrefix) + Write8(opPrefix); + arg.operandReg = regOp; + arg.WriteRex(this, 0, 0); + Write8(0x0F); + if (op > 0xFF) + Write8((op >> 8) & 0xFF); + Write8(op & 0xFF); + arg.WriteRest(this, extrabytes); +} + +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +{ + WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); +} + +static int GetVEXmmmmm(u16 op) +{ + // Currently, only 0x38 and 0x3A are used as secondary escape byte. + if ((op >> 8) == 0x3A) + return 3; + else if ((op >> 8) == 0x38) + return 2; + else + return 1; +} + +static int GetVEXpp(u8 opPrefix) +{ + if (opPrefix == 0x66) + return 1; + else if (opPrefix == 0xF3) + return 2; + else if (opPrefix == 0xF2) + return 3; + else + return 0; +} + +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (!Common::cpu_info.bAVX) + ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); + int mmmmm = GetVEXmmmmm(op); + int pp = GetVEXpp(opPrefix); + // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here + arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); + Write8(op & 0xFF); + arg.WriteRest(this, extrabytes, regOp1); +} + +// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 +void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (size != 32 && size != 64) + ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); + int mmmmm = GetVEXmmmmm(op); + int pp = GetVEXpp(opPrefix); + arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64); + Write8(op & 0xFF); + arg.WriteRest(this, extrabytes, regOp1); +} + +void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + CheckFlags(); + if (!Common::cpu_info.bBMI1) + ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); + WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); +} + +void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + CheckFlags(); + if (!Common::cpu_info.bBMI2) + ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer."); + WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); +} + +void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);} +void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);} + +void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) +{ +#ifdef ARCHITECTURE_X64 + // Alternate encoding + // This does not display correctly in MSVC's debugger, it thinks it's a MOVD + arg.operandReg = dest; + Write8(0x66); + arg.WriteRex(this, 64, 0); + Write8(0x0f); + Write8(0x6E); + arg.WriteRest(this, 0); +#else + arg.operandReg = dest; + Write8(0xF3); + Write8(0x0f); + Write8(0x7E); + arg.WriteRest(this, 0); +#endif +} + +void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) +{ + if (src > 7 || arg.IsSimpleReg()) + { + // Alternate encoding + // This does not display correctly in MSVC's debugger, it thinks it's a MOVD + arg.operandReg = src; + Write8(0x66); + arg.WriteRex(this, 64, 0); + Write8(0x0f); + Write8(0x7E); + arg.WriteRest(this, 0); + } + else + { + arg.operandReg = src; + arg.WriteRex(this, 0, 0); + Write8(0x66); + Write8(0x0f); + Write8(0xD6); + arg.WriteRest(this, 0); + } +} + +void XEmitter::WriteMXCSR(OpArg arg, int ext) +{ + if (arg.IsImm() || arg.IsSimpleReg()) + ASSERT_MSG(0, "MXCSR - invalid operand"); + + arg.operandReg = ext; + arg.WriteRex(this, 0, 0); + Write8(0x0F); + Write8(0xAE); + arg.WriteRest(this); +} + +void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} +void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} + +void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} +void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} +void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} + +void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} +void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} +void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} +void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} +void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} +void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} +void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} +void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} +void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} +void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} +void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} +void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} +void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} +void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} +void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} + +void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} +void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} +void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} +void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} +void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} +void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} +void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} +void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} +void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} +void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} +void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} +void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} +void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} +void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} +void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} +void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} +void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} +void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} +void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} +void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} +void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} +void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} +void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } +void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} +void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} +void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} + +void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} + +void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed +void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered +void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered +void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} + +void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} +void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} + +void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} + +void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} +void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} + +void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} + +void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } +void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } +void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } +void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } + +void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } +void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } +void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } +void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } + +void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} +void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} + +void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} +void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} + +void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} +void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} +void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} +void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} +void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} +void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} + +void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} +void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} +void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} +void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} + +void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} +void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} +void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} +void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} + +void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} + +void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} +void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} + +void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only + +// THESE TWO ARE UNTESTED. +void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} +void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} + +void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} +void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} + +void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) +{ + if (Common::cpu_info.bSSE3) + { + WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup + } + else + { + // Simulate this instruction with SSE2 instructions + if (!arg.IsSimpleReg(regOp)) + MOVSD(regOp, arg); + UNPCKLPD(regOp, R(regOp)); + } +} + +//There are a few more left + +// Also some integer instructions are missing +void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} +void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} +void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} + +void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} +void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} +void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);} +void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);} + +void XEmitter::PSRLW(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); + Write8(shift); +} + +void XEmitter::PSRLD(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); + Write8(shift); +} + +void XEmitter::PSRLQ(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); + Write8(shift); +} + +void XEmitter::PSRLQ(X64Reg reg, OpArg arg) +{ + WriteSSEOp(0x66, 0xd3, reg, arg); +} + +void XEmitter::PSRLDQ(X64Reg reg, int shift) { + WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg)); + Write8(shift); +} + +void XEmitter::PSLLW(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); + Write8(shift); +} + +void XEmitter::PSLLD(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); + Write8(shift); +} + +void XEmitter::PSLLQ(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); + Write8(shift); +} + +void XEmitter::PSLLDQ(X64Reg reg, int shift) { + WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg)); + Write8(shift); +} + +void XEmitter::PSRAW(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg)); + Write8(shift); +} + +void XEmitter::PSRAD(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg)); + Write8(shift); +} + +void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +{ + if (!Common::cpu_info.bSSSE3) + ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); + WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); +} + +void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +{ + if (!Common::cpu_info.bSSE4_1) + ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); + WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); +} + +void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} +void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} +void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} +void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} + +void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} +void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} +void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} +void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} +void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} +void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} +void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} +void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} + +void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} +void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} +void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} +void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} +void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} +void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} +void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} +void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} +void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} +void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} +void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} +void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} + +void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} +void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} +void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} +void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } +void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } + +void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} + +void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} +void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} +void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} +void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} + +void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} +void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} +void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} +void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} + +void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} +void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);} +void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} +void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} + +void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} +void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} +void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} +void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} + +void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} +void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} +void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} +void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} + +void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} +void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} + +void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);} +void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);} +void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);} + +void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);} +void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);} +void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);} + +void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} +void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} + +void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } +void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} + +void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } +void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } +void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } +void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } + +void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } +void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} + +// VEX +void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} +void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} +void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} +void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} + +void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } +void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } +void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } +void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } +void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } +void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } +void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } +void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } + +void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } +void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } +void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } +void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } + +void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } +void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } +void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } +void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } +void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } +void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } +void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } +void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } +void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } +void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } +void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } +void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } +void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } +void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } +void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } +void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } +void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } +void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } +void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } + +void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} +void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} +void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} +void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} +void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} +void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} + +// Prefixes + +void XEmitter::LOCK() { Write8(0xF0); } +void XEmitter::REP() { Write8(0xF3); } +void XEmitter::REPNE() { Write8(0xF2); } +void XEmitter::FSOverride() { Write8(0x64); } +void XEmitter::GSOverride() { Write8(0x65); } + +void XEmitter::FWAIT() +{ + Write8(0x9B); +} + +// TODO: make this more generic +void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) +{ + int mf = 0; + ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); + switch (bits) + { + case 32: mf = 0; break; + case 64: mf = 4; break; + case 80: mf = 2; break; + default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); + } + Write8(0xd9 | mf); + // x87 instructions use the reg field of the ModR/M byte as opcode: + if (bits == 80) + op = op_80b; + arg.WriteRest(this, 0, (X64Reg) op); +} + +void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} +void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} +void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} +void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } + +void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } + +void XCodeBlock::PoisonMemory() { + // x86/64: 0xCC = breakpoint + memset(region, 0xCC, region_size); +} + +} diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h new file mode 100644 index 000000000..312e9dc19 --- /dev/null +++ b/src/common/x64/emitter.h @@ -0,0 +1,1067 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#pragma once + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/code_block.h" + +#if defined(ARCHITECTURE_X64) && !defined(_ARCH_64) +#define _ARCH_64 +#endif + +#ifdef _ARCH_64 +#define PTRBITS 64 +#else +#define PTRBITS 32 +#endif + +namespace Gen +{ + +enum X64Reg +{ + EAX = 0, EBX = 3, ECX = 1, EDX = 2, + ESI = 6, EDI = 7, EBP = 5, ESP = 4, + + RAX = 0, RBX = 3, RCX = 1, RDX = 2, + RSI = 6, RDI = 7, RBP = 5, RSP = 4, + R8 = 8, R9 = 9, R10 = 10,R11 = 11, + R12 = 12,R13 = 13,R14 = 14,R15 = 15, + + AL = 0, BL = 3, CL = 1, DL = 2, + SIL = 6, DIL = 7, BPL = 5, SPL = 4, + AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106, + + AX = 0, BX = 3, CX = 1, DX = 2, + SI = 6, DI = 7, BP = 5, SP = 4, + + XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + + YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15, + + INVALID_REG = 0xFFFFFFFF +}; + +enum CCFlags +{ + CC_O = 0, + CC_NO = 1, + CC_B = 2, CC_C = 2, CC_NAE = 2, + CC_NB = 3, CC_NC = 3, CC_AE = 3, + CC_Z = 4, CC_E = 4, + CC_NZ = 5, CC_NE = 5, + CC_BE = 6, CC_NA = 6, + CC_NBE = 7, CC_A = 7, + CC_S = 8, + CC_NS = 9, + CC_P = 0xA, CC_PE = 0xA, + CC_NP = 0xB, CC_PO = 0xB, + CC_L = 0xC, CC_NGE = 0xC, + CC_NL = 0xD, CC_GE = 0xD, + CC_LE = 0xE, CC_NG = 0xE, + CC_NLE = 0xF, CC_G = 0xF +}; + +enum +{ + NUMGPRs = 16, + NUMXMMs = 16, +}; + +enum +{ + SCALE_NONE = 0, + SCALE_1 = 1, + SCALE_2 = 2, + SCALE_4 = 4, + SCALE_8 = 8, + SCALE_ATREG = 16, + //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG + SCALE_NOBASE_2 = 34, + SCALE_NOBASE_4 = 36, + SCALE_NOBASE_8 = 40, + SCALE_RIP = 0xFF, + SCALE_IMM8 = 0xF0, + SCALE_IMM16 = 0xF1, + SCALE_IMM32 = 0xF2, + SCALE_IMM64 = 0xF3, +}; + +enum NormalOp { + nrmADD, + nrmADC, + nrmSUB, + nrmSBB, + nrmAND, + nrmOR , + nrmXOR, + nrmMOV, + nrmTEST, + nrmCMP, + nrmXCHG, +}; + +enum { + CMP_EQ = 0, + CMP_LT = 1, + CMP_LE = 2, + CMP_UNORD = 3, + CMP_NEQ = 4, + CMP_NLT = 5, + CMP_NLE = 6, + CMP_ORD = 7, +}; + +enum FloatOp { + floatLD = 0, + floatST = 2, + floatSTP = 3, + floatLD80 = 5, + floatSTP80 = 7, + + floatINVALID = -1, +}; + +enum FloatRound { + FROUND_NEAREST = 0, + FROUND_FLOOR = 1, + FROUND_CEIL = 2, + FROUND_ZERO = 3, + FROUND_MXCSR = 4, + + FROUND_RAISE_PRECISION = 0, + FROUND_IGNORE_PRECISION = 8, +}; + +class XEmitter; + +// RIP addressing does not benefit from micro op fusion on Core arch +struct OpArg +{ + OpArg() {} // dummy op arg, used for storage + OpArg(u64 _offset, int _scale, X64Reg rmReg = RAX, X64Reg scaledReg = RAX) + { + operandReg = 0; + scale = (u8)_scale; + offsetOrBaseReg = (u16)rmReg; + indexReg = (u16)scaledReg; + //if scale == 0 never mind offsetting + offset = _offset; + } + bool operator==(const OpArg &b) const + { + return operandReg == b.operandReg && scale == b.scale && offsetOrBaseReg == b.offsetOrBaseReg && + indexReg == b.indexReg && offset == b.offset; + } + void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; + void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; + void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; + void WriteFloatModRM(XEmitter *emit, FloatOp op); + void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); + // This one is public - must be written to + u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available. + u16 operandReg; + + void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const; + bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;} + bool IsSimpleReg() const {return scale == SCALE_NONE;} + bool IsSimpleReg(X64Reg reg) const + { + if (!IsSimpleReg()) + return false; + return GetSimpleReg() == reg; + } + + bool CanDoOpWith(const OpArg &other) const + { + if (IsSimpleReg()) return true; + if (!IsSimpleReg() && !other.IsSimpleReg() && !other.IsImm()) return false; + return true; + } + + int GetImmBits() const + { + switch (scale) + { + case SCALE_IMM8: return 8; + case SCALE_IMM16: return 16; + case SCALE_IMM32: return 32; + case SCALE_IMM64: return 64; + default: return -1; + } + } + + void SetImmBits(int bits) { + switch (bits) + { + case 8: scale = SCALE_IMM8; break; + case 16: scale = SCALE_IMM16; break; + case 32: scale = SCALE_IMM32; break; + case 64: scale = SCALE_IMM64; break; + } + } + + X64Reg GetSimpleReg() const + { + if (scale == SCALE_NONE) + return (X64Reg)offsetOrBaseReg; + else + return INVALID_REG; + } + + u32 GetImmValue() const { + return (u32)offset; + } + + // For loops. + void IncreaseOffset(int sz) { + offset += sz; + } + +private: + u8 scale; + u16 offsetOrBaseReg; + u16 indexReg; +}; + +inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} +template +inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);} +inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} +inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} + +inline OpArg MDisp(X64Reg value, int offset) +{ + return OpArg((u32)offset, SCALE_ATREG, value); +} + +inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) +{ + return OpArg(offset, scale, base, scaled); +} + +inline OpArg MScaled(X64Reg scaled, int scale, int offset) +{ + if (scale == SCALE_1) + return OpArg(offset, SCALE_ATREG, scaled); + else + return OpArg(offset, scale | 0x20, RAX, scaled); +} + +inline OpArg MRegSum(X64Reg base, X64Reg offset) +{ + return MComplex(base, offset, 1, 0); +} + +inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);} +inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used +inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);} +inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);} +inline OpArg UImmAuto(u32 imm) { + return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8); +} +inline OpArg SImmAuto(s32 imm) { + return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8); +} + +#ifdef _ARCH_64 +inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} +#else +inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);} +#endif + +inline u32 PtrOffset(const void* ptr, const void* base) +{ +#ifdef _ARCH_64 + s64 distance = (s64)ptr-(s64)base; + if (distance >= 0x80000000LL || + distance < -0x80000000LL) + { + ASSERT_MSG(0, "pointer offset out of range"); + return 0; + } + + return (u32)distance; +#else + return (u32)ptr-(u32)base; +#endif +} + +//usage: int a[]; ARRAY_OFFSET(a,10) +#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0])) +//usage: struct {int e;} s; STRUCT_OFFSET(s,e) +#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) + +struct FixupBranch +{ + u8 *ptr; + int type; //0 = 8bit 1 = 32bit +}; + +enum SSECompare +{ + EQ = 0, + LT, + LE, + UNORD, + NEQ, + NLT, + NLE, + ORD, +}; + +typedef const u8* JumpTarget; + +class XEmitter +{ + friend struct OpArg; // for Write8 etc +private: + u8 *code; + bool flags_locked; + + void CheckFlags(); + + void Rex(int w, int r, int x, int b); + void WriteSimple1Byte(int bits, u8 byte, X64Reg reg); + void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); + void WriteMulDivType(int bits, OpArg src, int ext); + void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); + void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); + void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); + void WriteMXCSR(OpArg arg, int ext); + void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); + void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); + + void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); + +protected: + inline void Write8(u8 value) {*code++ = value;} + inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} + inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} + inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} + +public: + XEmitter() { code = nullptr; flags_locked = false; } + XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; } + virtual ~XEmitter() {} + + void WriteModRM(int mod, int rm, int reg); + void WriteSIB(int scale, int index, int base); + + void SetCodePtr(u8 *ptr); + void ReserveCodeSpace(int bytes); + const u8 *AlignCode4(); + const u8 *AlignCode16(); + const u8 *AlignCodePage(); + const u8 *GetCodePtr() const; + u8 *GetWritableCodePtr(); + + void LockFlags() { flags_locked = true; } + void UnlockFlags() { flags_locked = false; } + + // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU + // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., + // INC and DEC are slow on Intel Core, but not on AMD. They create a + // false flag dependency because they only update a subset of the flags. + // XCHG is SLOW and should be avoided. + + // Debug breakpoint + void INT3(); + + // Do nothing + void NOP(size_t count = 1); + + // Save energy in wait-loops on P4 only. Probably not too useful. + void PAUSE(); + + // Flag control + void STC(); + void CLC(); + void CMC(); + + // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD! + void LAHF(); // 3 cycle vector path + void SAHF(); // direct path fast + + + // Stack control + void PUSH(X64Reg reg); + void POP(X64Reg reg); + void PUSH(int bits, const OpArg ®); + void POP(int bits, const OpArg ®); + void PUSHF(); + void POPF(); + + // Flow control + void RET(); + void RET_FAST(); + void UD2(); + FixupBranch J(bool force5bytes = false); + + void JMP(const u8 * addr, bool force5Bytes = false); + void JMP(OpArg arg); + void JMPptr(const OpArg &arg); + void JMPself(); //infinite loop! +#ifdef CALL +#undef CALL +#endif + void CALL(const void *fnptr); + void CALLptr(OpArg arg); + + FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); + //void J_CC(CCFlags conditionCode, JumpTarget target); + void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); + + void SetJumpTarget(const FixupBranch &branch); + + void SETcc(CCFlags flag, OpArg dest); + // Note: CMOV brings small if any benefit on current cpus. + void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag); + + // Fences + void LFENCE(); + void MFENCE(); + void SFENCE(); + + // Bit scan + void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit + void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit + + // Cache control + enum PrefetchLevel + { + PF_NTA, //Non-temporal (data used once and only once) + PF_T0, //All cache levels + PF_T1, //Levels 2+ (aliased to T0 on AMD) + PF_T2, //Levels 3+ (aliased to T0 on AMD) + }; + void PREFETCH(PrefetchLevel level, OpArg arg); + void MOVNTI(int bits, OpArg dest, X64Reg src); + void MOVNTDQ(OpArg arg, X64Reg regOp); + void MOVNTPS(OpArg arg, X64Reg regOp); + void MOVNTPD(OpArg arg, X64Reg regOp); + + // Multiplication / division + void MUL(int bits, OpArg src); //UNSIGNED + void IMUL(int bits, OpArg src); //SIGNED + void IMUL(int bits, X64Reg regOp, OpArg src); + void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); + void DIV(int bits, OpArg src); + void IDIV(int bits, OpArg src); + + // Shift + void ROL(int bits, OpArg dest, OpArg shift); + void ROR(int bits, OpArg dest, OpArg shift); + void RCL(int bits, OpArg dest, OpArg shift); + void RCR(int bits, OpArg dest, OpArg shift); + void SHL(int bits, OpArg dest, OpArg shift); + void SHR(int bits, OpArg dest, OpArg shift); + void SAR(int bits, OpArg dest, OpArg shift); + + // Bit Test + void BT(int bits, OpArg dest, OpArg index); + void BTS(int bits, OpArg dest, OpArg index); + void BTR(int bits, OpArg dest, OpArg index); + void BTC(int bits, OpArg dest, OpArg index); + + // Double-Precision Shift + void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); + void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); + + // Extend EAX into EDX in various ways + void CWD(int bits = 16); + inline void CDQ() {CWD(32);} + inline void CQO() {CWD(64);} + void CBW(int bits = 8); + inline void CWDE() {CBW(16);} + inline void CDQE() {CBW(32);} + + // Load effective address + void LEA(int bits, X64Reg dest, OpArg src); + + // Integer arithmetic + void NEG (int bits, OpArg src); + void ADD (int bits, const OpArg &a1, const OpArg &a2); + void ADC (int bits, const OpArg &a1, const OpArg &a2); + void SUB (int bits, const OpArg &a1, const OpArg &a2); + void SBB (int bits, const OpArg &a1, const OpArg &a2); + void AND (int bits, const OpArg &a1, const OpArg &a2); + void CMP (int bits, const OpArg &a1, const OpArg &a2); + + // Bit operations + void NOT (int bits, OpArg src); + void OR (int bits, const OpArg &a1, const OpArg &a2); + void XOR (int bits, const OpArg &a1, const OpArg &a2); + void MOV (int bits, const OpArg &a1, const OpArg &a2); + void TEST(int bits, const OpArg &a1, const OpArg &a2); + + // Are these useful at all? Consider removing. + void XCHG(int bits, const OpArg &a1, const OpArg &a2); + void XCHG_AHAL(); + + // Byte swapping (32 and 64-bit only). + void BSWAP(int bits, X64Reg reg); + + // Sign/zero extension + void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary + void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); + + // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE. + void MOVBE(int dbits, const OpArg& dest, const OpArg& src); + + // Available only on AMD >= Phenom or Intel >= Haswell + void LZCNT(int bits, X64Reg dest, OpArg src); + // Note: this one is actually part of BMI1 + void TZCNT(int bits, X64Reg dest, OpArg src); + + // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) + void STMXCSR(OpArg memloc); + void LDMXCSR(OpArg memloc); + + // Prefixes + void LOCK(); + void REP(); + void REPNE(); + void FSOverride(); + void GSOverride(); + + // x87 + enum x87StatusWordBits { + x87_InvalidOperation = 0x1, + x87_DenormalizedOperand = 0x2, + x87_DivisionByZero = 0x4, + x87_Overflow = 0x8, + x87_Underflow = 0x10, + x87_Precision = 0x20, + x87_StackFault = 0x40, + x87_ErrorSummary = 0x80, + x87_C0 = 0x100, + x87_C1 = 0x200, + x87_C2 = 0x400, + x87_TopOfStack = 0x2000 | 0x1000 | 0x800, + x87_C3 = 0x4000, + x87_FPUBusy = 0x8000, + }; + + void FLD(int bits, OpArg src); + void FST(int bits, OpArg dest); + void FSTP(int bits, OpArg dest); + void FNSTSW_AX(); + void FWAIT(); + + // SSE/SSE2: Floating point arithmetic + void ADDSS(X64Reg regOp, OpArg arg); + void ADDSD(X64Reg regOp, OpArg arg); + void SUBSS(X64Reg regOp, OpArg arg); + void SUBSD(X64Reg regOp, OpArg arg); + void MULSS(X64Reg regOp, OpArg arg); + void MULSD(X64Reg regOp, OpArg arg); + void DIVSS(X64Reg regOp, OpArg arg); + void DIVSD(X64Reg regOp, OpArg arg); + void MINSS(X64Reg regOp, OpArg arg); + void MINSD(X64Reg regOp, OpArg arg); + void MAXSS(X64Reg regOp, OpArg arg); + void MAXSD(X64Reg regOp, OpArg arg); + void SQRTSS(X64Reg regOp, OpArg arg); + void SQRTSD(X64Reg regOp, OpArg arg); + void RSQRTSS(X64Reg regOp, OpArg arg); + + // SSE/SSE2: Floating point bitwise (yes) + void CMPSS(X64Reg regOp, OpArg arg, u8 compare); + void CMPSD(X64Reg regOp, OpArg arg, u8 compare); + + inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } + inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } + inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } + inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } + inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } + inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } + inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } + + // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) + void ADDPS(X64Reg regOp, OpArg arg); + void ADDPD(X64Reg regOp, OpArg arg); + void SUBPS(X64Reg regOp, OpArg arg); + void SUBPD(X64Reg regOp, OpArg arg); + void CMPPS(X64Reg regOp, OpArg arg, u8 compare); + void CMPPD(X64Reg regOp, OpArg arg, u8 compare); + void MULPS(X64Reg regOp, OpArg arg); + void MULPD(X64Reg regOp, OpArg arg); + void DIVPS(X64Reg regOp, OpArg arg); + void DIVPD(X64Reg regOp, OpArg arg); + void MINPS(X64Reg regOp, OpArg arg); + void MINPD(X64Reg regOp, OpArg arg); + void MAXPS(X64Reg regOp, OpArg arg); + void MAXPD(X64Reg regOp, OpArg arg); + void SQRTPS(X64Reg regOp, OpArg arg); + void SQRTPD(X64Reg regOp, OpArg arg); + void RCPPS(X64Reg regOp, OpArg arg); + void RSQRTPS(X64Reg regOp, OpArg arg); + + // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) + void ANDPS(X64Reg regOp, OpArg arg); + void ANDPD(X64Reg regOp, OpArg arg); + void ANDNPS(X64Reg regOp, OpArg arg); + void ANDNPD(X64Reg regOp, OpArg arg); + void ORPS(X64Reg regOp, OpArg arg); + void ORPD(X64Reg regOp, OpArg arg); + void XORPS(X64Reg regOp, OpArg arg); + void XORPD(X64Reg regOp, OpArg arg); + + // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. + void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); + void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); + + // SSE/SSE2: Useful alternative to shuffle in some cases. + void MOVDDUP(X64Reg regOp, OpArg arg); + + // TODO: Actually implement +#if 0 + // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... + void ADDSUBPS(X64Reg dest, OpArg src); + void ADDSUBPD(X64Reg dest, OpArg src); + void HADDPD(X64Reg dest, OpArg src); + void HSUBPS(X64Reg dest, OpArg src); + void HSUBPD(X64Reg dest, OpArg src); + + // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". + void DPPD(X64Reg dest, OpArg src, u8 arg); + + // These are probably useful for VFPU emulation. + void INSERTPS(X64Reg dest, OpArg src, u8 arg); + void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); +#endif + + // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. + void HADDPS(X64Reg dest, OpArg src); + + // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". + void DPPS(X64Reg dest, OpArg src, u8 arg); + + void UNPCKLPS(X64Reg dest, OpArg src); + void UNPCKHPS(X64Reg dest, OpArg src); + void UNPCKLPD(X64Reg dest, OpArg src); + void UNPCKHPD(X64Reg dest, OpArg src); + + // SSE/SSE2: Compares. + void COMISS(X64Reg regOp, OpArg arg); + void COMISD(X64Reg regOp, OpArg arg); + void UCOMISS(X64Reg regOp, OpArg arg); + void UCOMISD(X64Reg regOp, OpArg arg); + + // SSE/SSE2: Moves. Use the right data type for your data, in most cases. + void MOVAPS(X64Reg regOp, OpArg arg); + void MOVAPD(X64Reg regOp, OpArg arg); + void MOVAPS(OpArg arg, X64Reg regOp); + void MOVAPD(OpArg arg, X64Reg regOp); + + void MOVUPS(X64Reg regOp, OpArg arg); + void MOVUPD(X64Reg regOp, OpArg arg); + void MOVUPS(OpArg arg, X64Reg regOp); + void MOVUPD(OpArg arg, X64Reg regOp); + + void MOVDQA(X64Reg regOp, OpArg arg); + void MOVDQA(OpArg arg, X64Reg regOp); + void MOVDQU(X64Reg regOp, OpArg arg); + void MOVDQU(OpArg arg, X64Reg regOp); + + void MOVSS(X64Reg regOp, OpArg arg); + void MOVSD(X64Reg regOp, OpArg arg); + void MOVSS(OpArg arg, X64Reg regOp); + void MOVSD(OpArg arg, X64Reg regOp); + + void MOVLPS(X64Reg regOp, OpArg arg); + void MOVLPD(X64Reg regOp, OpArg arg); + void MOVLPS(OpArg arg, X64Reg regOp); + void MOVLPD(OpArg arg, X64Reg regOp); + + void MOVHPS(X64Reg regOp, OpArg arg); + void MOVHPD(X64Reg regOp, OpArg arg); + void MOVHPS(OpArg arg, X64Reg regOp); + void MOVHPD(OpArg arg, X64Reg regOp); + + void MOVHLPS(X64Reg regOp1, X64Reg regOp2); + void MOVLHPS(X64Reg regOp1, X64Reg regOp2); + + void MOVD_xmm(X64Reg dest, const OpArg &arg); + void MOVQ_xmm(X64Reg dest, OpArg arg); + void MOVD_xmm(const OpArg &arg, X64Reg src); + void MOVQ_xmm(OpArg arg, X64Reg src); + + // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. + void MOVMSKPS(X64Reg dest, OpArg arg); + void MOVMSKPD(X64Reg dest, OpArg arg); + + // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. + void MASKMOVDQU(X64Reg dest, X64Reg src); + void LDDQU(X64Reg dest, OpArg src); + + // SSE/SSE2: Data type conversions. + void CVTPS2PD(X64Reg dest, OpArg src); + void CVTPD2PS(X64Reg dest, OpArg src); + void CVTSS2SD(X64Reg dest, OpArg src); + void CVTSI2SS(X64Reg dest, OpArg src); + void CVTSD2SS(X64Reg dest, OpArg src); + void CVTSI2SD(X64Reg dest, OpArg src); + void CVTDQ2PD(X64Reg regOp, OpArg arg); + void CVTPD2DQ(X64Reg regOp, OpArg arg); + void CVTDQ2PS(X64Reg regOp, OpArg arg); + void CVTPS2DQ(X64Reg regOp, OpArg arg); + + void CVTTPS2DQ(X64Reg regOp, OpArg arg); + void CVTTPD2DQ(X64Reg regOp, OpArg arg); + + // Destinations are X64 regs (rax, rbx, ...) for these instructions. + void CVTSS2SI(X64Reg xregdest, OpArg src); + void CVTSD2SI(X64Reg xregdest, OpArg src); + void CVTTSS2SI(X64Reg xregdest, OpArg arg); + void CVTTSD2SI(X64Reg xregdest, OpArg arg); + + // SSE2: Packed integer instructions + void PACKSSDW(X64Reg dest, OpArg arg); + void PACKSSWB(X64Reg dest, OpArg arg); + void PACKUSDW(X64Reg dest, OpArg arg); + void PACKUSWB(X64Reg dest, OpArg arg); + + void PUNPCKLBW(X64Reg dest, const OpArg &arg); + void PUNPCKLWD(X64Reg dest, const OpArg &arg); + void PUNPCKLDQ(X64Reg dest, const OpArg &arg); + void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); + + void PTEST(X64Reg dest, OpArg arg); + void PAND(X64Reg dest, OpArg arg); + void PANDN(X64Reg dest, OpArg arg); + void PXOR(X64Reg dest, OpArg arg); + void POR(X64Reg dest, OpArg arg); + + void PADDB(X64Reg dest, OpArg arg); + void PADDW(X64Reg dest, OpArg arg); + void PADDD(X64Reg dest, OpArg arg); + void PADDQ(X64Reg dest, OpArg arg); + + void PADDSB(X64Reg dest, OpArg arg); + void PADDSW(X64Reg dest, OpArg arg); + void PADDUSB(X64Reg dest, OpArg arg); + void PADDUSW(X64Reg dest, OpArg arg); + + void PSUBB(X64Reg dest, OpArg arg); + void PSUBW(X64Reg dest, OpArg arg); + void PSUBD(X64Reg dest, OpArg arg); + void PSUBQ(X64Reg dest, OpArg arg); + + void PSUBSB(X64Reg dest, OpArg arg); + void PSUBSW(X64Reg dest, OpArg arg); + void PSUBUSB(X64Reg dest, OpArg arg); + void PSUBUSW(X64Reg dest, OpArg arg); + + void PAVGB(X64Reg dest, OpArg arg); + void PAVGW(X64Reg dest, OpArg arg); + + void PCMPEQB(X64Reg dest, OpArg arg); + void PCMPEQW(X64Reg dest, OpArg arg); + void PCMPEQD(X64Reg dest, OpArg arg); + + void PCMPGTB(X64Reg dest, OpArg arg); + void PCMPGTW(X64Reg dest, OpArg arg); + void PCMPGTD(X64Reg dest, OpArg arg); + + void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); + void PINSRW(X64Reg dest, OpArg arg, u8 subreg); + + void PMADDWD(X64Reg dest, OpArg arg); + void PSADBW(X64Reg dest, OpArg arg); + + void PMAXSW(X64Reg dest, OpArg arg); + void PMAXUB(X64Reg dest, OpArg arg); + void PMINSW(X64Reg dest, OpArg arg); + void PMINUB(X64Reg dest, OpArg arg); + // SSE4: More MAX/MIN instructions. + void PMINSB(X64Reg dest, OpArg arg); + void PMINSD(X64Reg dest, OpArg arg); + void PMINUW(X64Reg dest, OpArg arg); + void PMINUD(X64Reg dest, OpArg arg); + void PMAXSB(X64Reg dest, OpArg arg); + void PMAXSD(X64Reg dest, OpArg arg); + void PMAXUW(X64Reg dest, OpArg arg); + void PMAXUD(X64Reg dest, OpArg arg); + + void PMOVMSKB(X64Reg dest, OpArg arg); + void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); + void PSHUFB(X64Reg dest, OpArg arg); + + void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); + void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); + + void PSRLW(X64Reg reg, int shift); + void PSRLD(X64Reg reg, int shift); + void PSRLQ(X64Reg reg, int shift); + void PSRLQ(X64Reg reg, OpArg arg); + void PSRLDQ(X64Reg reg, int shift); + + void PSLLW(X64Reg reg, int shift); + void PSLLD(X64Reg reg, int shift); + void PSLLQ(X64Reg reg, int shift); + void PSLLDQ(X64Reg reg, int shift); + + void PSRAW(X64Reg reg, int shift); + void PSRAD(X64Reg reg, int shift); + + // SSE4: data type conversions + void PMOVSXBW(X64Reg dest, OpArg arg); + void PMOVSXBD(X64Reg dest, OpArg arg); + void PMOVSXBQ(X64Reg dest, OpArg arg); + void PMOVSXWD(X64Reg dest, OpArg arg); + void PMOVSXWQ(X64Reg dest, OpArg arg); + void PMOVSXDQ(X64Reg dest, OpArg arg); + void PMOVZXBW(X64Reg dest, OpArg arg); + void PMOVZXBD(X64Reg dest, OpArg arg); + void PMOVZXBQ(X64Reg dest, OpArg arg); + void PMOVZXWD(X64Reg dest, OpArg arg); + void PMOVZXWQ(X64Reg dest, OpArg arg); + void PMOVZXDQ(X64Reg dest, OpArg arg); + + // SSE4: variable blend instructions (xmm0 implicit argument) + void PBLENDVB(X64Reg dest, OpArg arg); + void BLENDVPS(X64Reg dest, OpArg arg); + void BLENDVPD(X64Reg dest, OpArg arg); + void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); + void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); + + // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) + void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); + void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); + void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); + void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); + + inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } + inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } + inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } + inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } + + inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } + inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } + inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } + inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } + + inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } + inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } + inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } + inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } + + inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } + inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } + inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } + inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } + + // AVX + void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); + void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + + void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + + void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); + + // FMA3 + void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + + // VEX GPR instructions + void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); + void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void BLSR(int bits, X64Reg regOp, OpArg arg); + void BLSMSK(int bits, X64Reg regOp, OpArg arg); + void BLSI(int bits, X64Reg regOp, OpArg arg); + void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + + void RDTSC(); + + // Utility functions + // The difference between this and CALL is that this aligns the stack + // where appropriate. + void ABI_CallFunction(const void *func); + template + void ABI_CallFunction(T (*func)()) { + ABI_CallFunction((const void *)func); + } + + void ABI_CallFunction(const u8 *func) { + ABI_CallFunction((const void *)func); + } + void ABI_CallFunctionC16(const void *func, u16 param1); + void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); + + + // These only support u32 parameters, but that's enough for a lot of uses. + // These will destroy the 1 or 2 first "parameter regs". + void ABI_CallFunctionC(const void *func, u32 param1); + void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); + void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); + void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); + void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); + void ABI_CallFunctionP(const void *func, void *param1); + void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); + void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); + void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); + void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); + void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); + void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); + void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); + + // Pass a register as a parameter. + void ABI_CallFunctionR(const void *func, X64Reg reg1); + void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); + + template + void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { + ABI_CallFunctionC((const void *)func, param1); + } + + // A function that doesn't have any control over what it will do to regs, + // such as the dispatcher, should be surrounded by these. + void ABI_PushAllCalleeSavedRegsAndAdjustStack(); + void ABI_PopAllCalleeSavedRegsAndAdjustStack(); + + // A function that doesn't know anything about it's surroundings, should + // be surrounded by these to establish a safe environment, where it can roam free. + // An example is a backpatch injected function. + void ABI_PushAllCallerSavedRegsAndAdjustStack(); + void ABI_PopAllCallerSavedRegsAndAdjustStack(); + + unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); + void ABI_AlignStack(unsigned int frameSize); + void ABI_RestoreStack(unsigned int frameSize); + + // Sets up a __cdecl function. + // Only x64 really needs the parameter count. + void ABI_EmitPrologue(int maxCallParams); + void ABI_EmitEpilogue(int maxCallParams); + + #ifdef _M_IX86 + inline int ABI_GetNumXMMRegs() { return 8; } + #else + inline int ABI_GetNumXMMRegs() { return 16; } + #endif +}; // class XEmitter + + +// Everything that needs to generate X86 code should inherit from this. +// You get memory management for free, plus, you can use all the MOV etc functions without +// having to prefix them with gen-> or something similar. + +class XCodeBlock : public CodeBlock { +public: + void PoisonMemory() override; +}; + +} // namespace diff --git a/src/common/x64_emitter.cpp b/src/common/x64_emitter.cpp deleted file mode 100644 index 19db2e484..000000000 --- a/src/common/x64_emitter.cpp +++ /dev/null @@ -1,1989 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include - -#include "logging/log.h" - -#include "assert.h" -#include "x64_emitter.h" -#include "abi.h" -#include "cpu_detect.h" -#include "memory_util.h" - -#define PRIx64 "llx" - -// Minimize the diff against Dolphin -#define DYNA_REC JIT - -namespace Gen -{ - -struct NormalOpDef -{ - u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; -}; - -// 0xCC is code for invalid combination of immediates -static const NormalOpDef normalops[11] = -{ - {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD - {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC - - {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB - {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB - - {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND - {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR - - {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR - {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV - - {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from) - {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP - - {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG -}; - -enum NormalSSEOps -{ - sseCMP = 0xC2, - sseADD = 0x58, //ADD - sseSUB = 0x5C, //SUB - sseAND = 0x54, //AND - sseANDN = 0x55, //ANDN - sseOR = 0x56, - sseXOR = 0x57, - sseMUL = 0x59, //MUL - sseDIV = 0x5E, //DIV - sseMIN = 0x5D, //MIN - sseMAX = 0x5F, //MAX - sseCOMIS = 0x2F, //COMIS - sseUCOMIS = 0x2E, //UCOMIS - sseSQRT = 0x51, //SQRT - sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) - sseRCP = 0x53, //RCP - sseMOVAPfromRM = 0x28, //MOVAP from RM - sseMOVAPtoRM = 0x29, //MOVAP to RM - sseMOVUPfromRM = 0x10, //MOVUP from RM - sseMOVUPtoRM = 0x11, //MOVUP to RM - sseMOVLPfromRM= 0x12, - sseMOVLPtoRM = 0x13, - sseMOVHPfromRM= 0x16, - sseMOVHPtoRM = 0x17, - sseMOVHLPS = 0x12, - sseMOVLHPS = 0x16, - sseMOVDQfromRM = 0x6F, - sseMOVDQtoRM = 0x7F, - sseMASKMOVDQU = 0xF7, - sseLDDQU = 0xF0, - sseSHUF = 0xC6, - sseMOVNTDQ = 0xE7, - sseMOVNTP = 0x2B, - sseHADD = 0x7C, -}; - - -void XEmitter::SetCodePtr(u8 *ptr) -{ - code = ptr; -} - -const u8 *XEmitter::GetCodePtr() const -{ - return code; -} - -u8 *XEmitter::GetWritableCodePtr() -{ - return code; -} - -void XEmitter::ReserveCodeSpace(int bytes) -{ - for (int i = 0; i < bytes; i++) - *code++ = 0xCC; -} - -const u8 *XEmitter::AlignCode4() -{ - int c = int((u64)code & 3); - if (c) - ReserveCodeSpace(4-c); - return code; -} - -const u8 *XEmitter::AlignCode16() -{ - int c = int((u64)code & 15); - if (c) - ReserveCodeSpace(16-c); - return code; -} - -const u8 *XEmitter::AlignCodePage() -{ - int c = int((u64)code & 4095); - if (c) - ReserveCodeSpace(4096-c); - return code; -} - -// This operation modifies flags; check to see the flags are locked. -// If the flags are locked, we should immediately and loudly fail before -// causing a subtle JIT bug. -void XEmitter::CheckFlags() -{ - ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!"); -} - -void XEmitter::WriteModRM(int mod, int reg, int rm) -{ - Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); -} - -void XEmitter::WriteSIB(int scale, int index, int base) -{ - Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); -} - -void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const -{ - if (customOp == -1) customOp = operandReg; -#ifdef _M_X86_64 - u8 op = 0x40; - // REX.W (whether operation is a 64-bit operation) - if (opBits == 64) op |= 8; - // REX.R (whether ModR/M reg field refers to R8-R15. - if (customOp & 8) op |= 4; - // REX.X (whether ModR/M SIB index field refers to R8-R15) - if (indexReg & 8) op |= 2; - // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) - if (offsetOrBaseReg & 8) op |= 1; - // Write REX if wr have REX bits to write, or if the operation accesses - // SIL, DIL, BPL, or SPL. - if (op != 0x40 || - (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || - (opBits == 8 && (customOp & 0x10c) == 4)) - { - emit->Write8(op); - // Check the operation doesn't access AH, BH, CH, or DH. - DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0); - DEBUG_ASSERT((customOp & 0x100) == 0); - } -#else - DEBUG_ASSERT(opBits != 64); - DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1); - DEBUG_ASSERT((indexReg & 8) == 0); - DEBUG_ASSERT((offsetOrBaseReg & 8) == 0); - DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); - DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4); -#endif -} - -void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const -{ - int R = !(regOp1 & 8); - int X = !(indexReg & 8); - int B = !(offsetOrBaseReg & 8); - - int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); - - // do we need any VEX fields that only appear in the three-byte form? - if (X == 1 && B == 1 && W == 0 && mmmmm == 1) - { - u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 1) | pp; - emit->Write8(0xC5); - emit->Write8(RvvvvLpp); - } - else - { - u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; - u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 1) | pp; - emit->Write8(0xC4); - emit->Write8(RXBmmmmm); - emit->Write8(WvvvvLpp); - } -} - -void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, - bool warn_64bit_offset) const -{ - if (_operandReg == INVALID_REG) - _operandReg = (X64Reg)this->operandReg; - int mod = 0; - int ireg = indexReg; - bool SIB = false; - int _offsetOrBaseReg = this->offsetOrBaseReg; - - if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address - { - // Oh, RIP addressing. - _offsetOrBaseReg = 5; - emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); - //TODO : add some checks -#ifdef _M_X86_64 - u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; - s64 distance = (s64)offset - (s64)ripAddr; - ASSERT_MSG( - (distance < 0x80000000LL && - distance >= -0x80000000LL) || - !warn_64bit_offset, - "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", - ripAddr, offset); - s32 offs = (s32)distance; - emit->Write32((u32)offs); -#else - emit->Write32((u32)offset); -#endif - return; - } - - if (scale == 0) - { - // Oh, no memory, Just a reg. - mod = 3; //11 - } - else if (scale >= 1) - { - //Ah good, no scaling. - if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) - { - //Okay, we're good. No SIB necessary. - int ioff = (int)offset; - if (ioff == 0) - { - mod = 0; - } - else if (ioff<-128 || ioff>127) - { - mod = 2; //32-bit displacement - } - else - { - mod = 1; //8-bit displacement - } - } - else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) - { - SIB = true; - mod = 0; - _offsetOrBaseReg = 5; - } - else //if (scale != SCALE_ATREG) - { - if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :( - { - //So we have to fake it with SIB encoding :( - SIB = true; - } - - if (scale >= SCALE_1 && scale < SCALE_ATREG) - { - SIB = true; - } - - if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) - { - SIB = true; - ireg = _offsetOrBaseReg; - } - - //Okay, we're fine. Just disp encoding. - //We need displacement. Which size? - int ioff = (int)(s64)offset; - if (ioff < -128 || ioff > 127) - { - mod = 2; //32-bit displacement - } - else - { - mod = 1; //8-bit displacement - } - } - } - - // Okay. Time to do the actual writing - // ModRM byte: - int oreg = _offsetOrBaseReg; - if (SIB) - oreg = 4; - - // TODO(ector): WTF is this if about? I don't remember writing it :-) - //if (RIP) - // oreg = 5; - - emit->WriteModRM(mod, _operandReg&7, oreg&7); - - if (SIB) - { - //SIB byte - int ss; - switch (scale) - { - case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP - case SCALE_1: ss = 0; break; - case SCALE_2: ss = 1; break; - case SCALE_4: ss = 2; break; - case SCALE_8: ss = 3; break; - case SCALE_NOBASE_2: ss = 1; break; - case SCALE_NOBASE_4: ss = 2; break; - case SCALE_NOBASE_8: ss = 3; break; - case SCALE_ATREG: ss = 0; break; - default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break; - } - emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); - } - - if (mod == 1) //8-bit disp - { - emit->Write8((u8)(s8)(s32)offset); - } - else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp - { - emit->Write32((u32)offset); - } -} - -// W = operand extended width (1 if 64-bit) -// R = register# upper bit -// X = scale amnt upper bit -// B = base register# upper bit -void XEmitter::Rex(int w, int r, int x, int b) -{ - w = w ? 1 : 0; - r = r ? 1 : 0; - x = x ? 1 : 0; - b = b ? 1 : 0; - u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b)); - if (rx != 0x40) - Write8(rx); -} - -void XEmitter::JMP(const u8 *addr, bool force5Bytes) -{ - u64 fn = (u64)addr; - if (!force5Bytes) - { - s64 distance = (s64)(fn - ((u64)code + 2)); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, - "Jump target too far away, needs force5Bytes = true"); - //8 bits will do - Write8(0xEB); - Write8((u8)(s8)distance); - } - else - { - s64 distance = (s64)(fn - ((u64)code + 5)); - - ASSERT_MSG( - distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); - Write8(0xE9); - Write32((u32)(s32)distance); - } -} - -void XEmitter::JMPptr(const OpArg &arg2) -{ - OpArg arg = arg2; - if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); - arg.operandReg = 4; - arg.WriteRex(this, 0, 0); - Write8(0xFF); - arg.WriteRest(this); -} - -//Can be used to trap other processors, before overwriting their code -// not used in dolphin -void XEmitter::JMPself() -{ - Write8(0xEB); - Write8(0xFE); -} - -void XEmitter::CALLptr(OpArg arg) -{ - if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument"); - arg.operandReg = 2; - arg.WriteRex(this, 0, 0); - Write8(0xFF); - arg.WriteRest(this); -} - -void XEmitter::CALL(const void *fnptr) -{ - u64 distance = u64(fnptr) - (u64(code) + 5); - ASSERT_MSG( - distance < 0x0000000080000000ULL || - distance >= 0xFFFFFFFF80000000ULL, - "CALL out of range (%p calls %p)", code, fnptr); - Write8(0xE8); - Write32(u32(distance)); -} - -FixupBranch XEmitter::J(bool force5bytes) -{ - FixupBranch branch; - branch.type = force5bytes ? 1 : 0; - branch.ptr = code + (force5bytes ? 5 : 2); - if (!force5bytes) - { - //8 bits will do - Write8(0xEB); - Write8(0); - } - else - { - Write8(0xE9); - Write32(0); - } - return branch; -} - -FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) -{ - FixupBranch branch; - branch.type = force5bytes ? 1 : 0; - branch.ptr = code + (force5bytes ? 6 : 2); - if (!force5bytes) - { - //8 bits will do - Write8(0x70 + conditionCode); - Write8(0); - } - else - { - Write8(0x0F); - Write8(0x80 + conditionCode); - Write32(0); - } - return branch; -} - -void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) -{ - u64 fn = (u64)addr; - s64 distance = (s64)(fn - ((u64)code + 2)); - if (distance < -0x80 || distance >= 0x80 || force5bytes) - { - distance = (s64)(fn - ((u64)code + 6)); - ASSERT_MSG( - distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); - Write8(0x0F); - Write8(0x80 + conditionCode); - Write32((u32)(s32)distance); - } - else - { - Write8(0x70 + conditionCode); - Write8((u8)(s8)distance); - } -} - -void XEmitter::SetJumpTarget(const FixupBranch &branch) -{ - if (branch.type == 0) - { - s64 distance = (s64)(code - branch.ptr); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); - branch.ptr[-1] = (u8)(s8)distance; - } - else if (branch.type == 1) - { - s64 distance = (s64)(code - branch.ptr); - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); - ((s32*)branch.ptr)[-1] = (s32)distance; - } -} - -// INC/DEC considered harmful on newer CPUs due to partial flag set. -// Use ADD, SUB instead. - -/* -void XEmitter::INC(int bits, OpArg arg) -{ - if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument"); - arg.operandReg = 0; - if (bits == 16) {Write8(0x66);} - arg.WriteRex(this, bits, bits); - Write8(bits == 8 ? 0xFE : 0xFF); - arg.WriteRest(this); -} -void XEmitter::DEC(int bits, OpArg arg) -{ - if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument"); - arg.operandReg = 1; - if (bits == 16) {Write8(0x66);} - arg.WriteRex(this, bits, bits); - Write8(bits == 8 ? 0xFE : 0xFF); - arg.WriteRest(this); -} -*/ - -//Single byte opcodes -//There is no PUSHAD/POPAD in 64-bit mode. -void XEmitter::INT3() {Write8(0xCC);} -void XEmitter::RET() {Write8(0xC3);} -void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret - -// The first sign of decadence: optimized NOPs. -void XEmitter::NOP(size_t size) -{ - DEBUG_ASSERT((int)size > 0); - while (true) - { - switch (size) - { - case 0: - return; - case 1: - Write8(0x90); - return; - case 2: - Write8(0x66); Write8(0x90); - return; - case 3: - Write8(0x0F); Write8(0x1F); Write8(0x00); - return; - case 4: - Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00); - return; - case 5: - Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00); - Write8(0x00); - return; - case 6: - Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44); - Write8(0x00); Write8(0x00); - return; - case 7: - Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00); - Write8(0x00); Write8(0x00); Write8(0x00); - return; - case 8: - Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00); - Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); - return; - case 9: - Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84); - Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); - Write8(0x00); - return; - case 10: - Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F); - Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00); - Write8(0x00); Write8(0x00); - return; - default: - // Even though x86 instructions are allowed to be up to 15 bytes long, - // AMD advises against using NOPs longer than 11 bytes because they - // carry a performance penalty on CPUs older than AMD family 16h. - Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F); - Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00); - Write8(0x00); Write8(0x00); Write8(0x00); - size -= 11; - continue; - } - } -} - -void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu -void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry -void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry -void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry - -//TODO: xchg ah, al ??? -void XEmitter::XCHG_AHAL() -{ - Write8(0x86); - Write8(0xe0); - // alt. 86 c4 -} - -//These two can not be executed on early Intel 64-bit CPU:s, only on AMD! -void XEmitter::LAHF() {Write8(0x9F);} -void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);} - -void XEmitter::PUSHF() {Write8(0x9C);} -void XEmitter::POPF() {CheckFlags(); Write8(0x9D);} - -void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} -void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} -void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} - -void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) -{ - if (bits == 16) - Write8(0x66); - Rex(bits == 64, 0, 0, (int)reg >> 3); - Write8(byte + ((int)reg & 7)); -} - -void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) -{ - if (bits == 16) - Write8(0x66); - Rex(bits==64, 0, 0, (int)reg >> 3); - Write8(byte1); - Write8(byte2 + ((int)reg & 7)); -} - -void XEmitter::CWD(int bits) -{ - if (bits == 16) - Write8(0x66); - Rex(bits == 64, 0, 0, 0); - Write8(0x99); -} - -void XEmitter::CBW(int bits) -{ - if (bits == 8) - Write8(0x66); - Rex(bits == 32, 0, 0, 0); - Write8(0x98); -} - -//Simple opcodes - - -//push/pop do not need wide to be 64-bit -void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} -void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} - -void XEmitter::PUSH(int bits, const OpArg ®) -{ - if (reg.IsSimpleReg()) - PUSH(reg.GetSimpleReg()); - else if (reg.IsImm()) - { - switch (reg.GetImmBits()) - { - case 8: - Write8(0x6A); - Write8((u8)(s8)reg.offset); - break; - case 16: - Write8(0x66); - Write8(0x68); - Write16((u16)(s16)(s32)reg.offset); - break; - case 32: - Write8(0x68); - Write32((u32)reg.offset); - break; - default: - ASSERT_MSG(0, "PUSH - Bad imm bits"); - break; - } - } - else - { - if (bits == 16) - Write8(0x66); - reg.WriteRex(this, bits, bits); - Write8(0xFF); - reg.WriteRest(this, 0, (X64Reg)6); - } -} - -void XEmitter::POP(int /*bits*/, const OpArg ®) -{ - if (reg.IsSimpleReg()) - POP(reg.GetSimpleReg()); - else - ASSERT_MSG(0, "POP - Unsupported encoding"); -} - -void XEmitter::BSWAP(int bits, X64Reg reg) -{ - if (bits >= 32) - { - WriteSimple2Byte(bits, 0x0F, 0xC8, reg); - } - else if (bits == 16) - { - ROL(16, R(reg), Imm8(8)); - } - else if (bits == 8) - { - // Do nothing - can't bswap a single byte... - } - else - { - ASSERT_MSG(0, "BSWAP - Wrong number of bits"); - } -} - -// Undefined opcode - reserved -// If we ever need a way to always cause a non-breakpoint hard exception... -void XEmitter::UD2() -{ - Write8(0x0F); - Write8(0x0B); -} - -void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) -{ - ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument"); - arg.operandReg = (u8)level; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - Write8(0x18); - arg.WriteRest(this); -} - -void XEmitter::SETcc(CCFlags flag, OpArg dest) -{ - ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument"); - dest.operandReg = 0; - dest.WriteRex(this, 0, 8); - Write8(0x0F); - Write8(0x90 + (u8)flag); - dest.WriteRest(this); -} - -void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) -{ - ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument"); - ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported"); - if (bits == 16) - Write8(0x66); - src.operandReg = dest; - src.WriteRex(this, bits, bits); - Write8(0x0F); - Write8(0x40 + (u8)flag); - src.WriteRest(this); -} - -void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) -{ - ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument"); - CheckFlags(); - src.operandReg = ext; - if (bits == 16) - Write8(0x66); - src.WriteRex(this, bits, bits, 0); - if (bits == 8) - { - Write8(0xF6); - } - else - { - Write8(0xF7); - } - src.WriteRest(this); -} - -void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);} -void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);} -void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} -void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} -void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} -void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} - -void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) -{ - ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument"); - CheckFlags(); - src.operandReg = (u8)dest; - if (bits == 16) - Write8(0x66); - if (rep) - Write8(0xF3); - src.WriteRex(this, bits, bits); - Write8(0x0F); - Write8(byte2); - src.WriteRest(this); -} - -void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) -{ - if (bits <= 16) - ASSERT_MSG(0, "MOVNTI - bits<=16"); - WriteBitSearchType(bits, src, dest, 0xC3); -} - -void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit -void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit - -void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) -{ - CheckFlags(); - if (!Common::cpu_info.bBMI1) - ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); - WriteBitSearchType(bits, dest, src, 0xBC, true); -} -void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) -{ - CheckFlags(); - if (!Common::cpu_info.bLZCNT) - ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer."); - WriteBitSearchType(bits, dest, src, 0xBD, true); -} - -void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) -{ - ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument"); - if (dbits == sbits) - { - MOV(dbits, R(dest), src); - return; - } - src.operandReg = (u8)dest; - if (dbits == 16) - Write8(0x66); - src.WriteRex(this, dbits, sbits); - if (sbits == 8) - { - Write8(0x0F); - Write8(0xBE); - } - else if (sbits == 16) - { - Write8(0x0F); - Write8(0xBF); - } - else if (sbits == 32 && dbits == 64) - { - Write8(0x63); - } - else - { - Crash(); - } - src.WriteRest(this); -} - -void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) -{ - ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument"); - if (dbits == sbits) - { - MOV(dbits, R(dest), src); - return; - } - src.operandReg = (u8)dest; - if (dbits == 16) - Write8(0x66); - //the 32bit result is automatically zero extended to 64bit - src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); - if (sbits == 8) - { - Write8(0x0F); - Write8(0xB6); - } - else if (sbits == 16) - { - Write8(0x0F); - Write8(0xB7); - } - else if (sbits == 32 && dbits == 64) - { - Write8(0x8B); - } - else - { - ASSERT_MSG(0, "MOVZX - Invalid size"); - } - src.WriteRest(this); -} - -void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) -{ - ASSERT_MSG(Common::cpu_info.bMOVBE, "Generating MOVBE on a system that does not support it."); - if (bits == 8) - { - MOV(bits, dest, src); - return; - } - - if (bits == 16) - Write8(0x66); - - if (dest.IsSimpleReg()) - { - ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); - src.WriteRex(this, bits, bits, dest.GetSimpleReg()); - Write8(0x0F); Write8(0x38); Write8(0xF0); - src.WriteRest(this, 0, dest.GetSimpleReg()); - } - else if (src.IsSimpleReg()) - { - ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); - dest.WriteRex(this, bits, bits, src.GetSimpleReg()); - Write8(0x0F); Write8(0x38); Write8(0xF1); - dest.WriteRest(this, 0, src.GetSimpleReg()); - } - else - { - ASSERT_MSG(0, "MOVBE: Not loading or storing to mem"); - } -} - - -void XEmitter::LEA(int bits, X64Reg dest, OpArg src) -{ - ASSERT_MSG(!src.IsImm(), "LEA - Imm argument"); - src.operandReg = (u8)dest; - if (bits == 16) - Write8(0x66); //TODO: performance warning - src.WriteRex(this, bits, bits); - Write8(0x8D); - src.WriteRest(this, 0, INVALID_REG, bits == 64); -} - -//shift can be either imm8 or cl -void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) -{ - CheckFlags(); - bool writeImm = false; - if (dest.IsImm()) - { - ASSERT_MSG(0, "WriteShift - can't shift imms"); - } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) - { - ASSERT_MSG(0, "WriteShift - illegal argument"); - } - dest.operandReg = ext; - if (bits == 16) - Write8(0x66); - dest.WriteRex(this, bits, bits, 0); - if (shift.GetImmBits() == 8) - { - //ok an imm - u8 imm = (u8)shift.offset; - if (imm == 1) - { - Write8(bits == 8 ? 0xD0 : 0xD1); - } - else - { - writeImm = true; - Write8(bits == 8 ? 0xC0 : 0xC1); - } - } - else - { - Write8(bits == 8 ? 0xD2 : 0xD3); - } - dest.WriteRest(this, writeImm ? 1 : 0); - if (writeImm) - Write8((u8)shift.offset); -} - -// large rotates and shift are slower on intel than amd -// intel likes to rotate by 1, and the op is smaller too -void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} -void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} -void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} -void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} -void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} -void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} -void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} - -// index can be either imm8 or register, don't use memory destination because it's slow -void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) -{ - CheckFlags(); - if (dest.IsImm()) - { - ASSERT_MSG(0, "WriteBitTest - can't test imms"); - } - if ((index.IsImm() && index.GetImmBits() != 8)) - { - ASSERT_MSG(0, "WriteBitTest - illegal argument"); - } - if (bits == 16) - Write8(0x66); - if (index.IsImm()) - { - dest.WriteRex(this, bits, bits); - Write8(0x0F); Write8(0xBA); - dest.WriteRest(this, 1, (X64Reg)ext); - Write8((u8)index.offset); - } - else - { - X64Reg operand = index.GetSimpleReg(); - dest.WriteRex(this, bits, bits, operand); - Write8(0x0F); Write8(0x83 + 8*ext); - dest.WriteRest(this, 1, operand); - } -} - -void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);} -void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} -void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} -void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} - -//shift can be either imm8 or cl -void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) -{ - CheckFlags(); - if (dest.IsImm()) - { - ASSERT_MSG(0, "SHRD - can't use imms as destination"); - } - if (!src.IsSimpleReg()) - { - ASSERT_MSG(0, "SHRD - must use simple register as source"); - } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) - { - ASSERT_MSG(0, "SHRD - illegal shift"); - } - if (bits == 16) - Write8(0x66); - X64Reg operand = src.GetSimpleReg(); - dest.WriteRex(this, bits, bits, operand); - if (shift.GetImmBits() == 8) - { - Write8(0x0F); Write8(0xAC); - dest.WriteRest(this, 1, operand); - Write8((u8)shift.offset); - } - else - { - Write8(0x0F); Write8(0xAD); - dest.WriteRest(this, 0, operand); - } -} - -void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) -{ - CheckFlags(); - if (dest.IsImm()) - { - ASSERT_MSG(0, "SHLD - can't use imms as destination"); - } - if (!src.IsSimpleReg()) - { - ASSERT_MSG(0, "SHLD - must use simple register as source"); - } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) - { - ASSERT_MSG(0, "SHLD - illegal shift"); - } - if (bits == 16) - Write8(0x66); - X64Reg operand = src.GetSimpleReg(); - dest.WriteRex(this, bits, bits, operand); - if (shift.GetImmBits() == 8) - { - Write8(0x0F); Write8(0xA4); - dest.WriteRest(this, 1, operand); - Write8((u8)shift.offset); - } - else - { - Write8(0x0F); Write8(0xA5); - dest.WriteRest(this, 0, operand); - } -} - -void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits) -{ - if (bits == 16) - emit->Write8(0x66); - - this->operandReg = (u8)_operandReg; - WriteRex(emit, bits, bits); - emit->Write8(op); - WriteRest(emit); -} - -//operand can either be immediate or register -void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const -{ - X64Reg _operandReg; - if (IsImm()) - { - ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order"); - } - - if (bits == 16) - emit->Write8(0x66); - - int immToWrite = 0; - - if (operand.IsImm()) - { - WriteRex(emit, bits, bits); - - if (!toRM) - { - ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)"); - } - - if (operand.scale == SCALE_IMM8 && bits == 8) - { - // op al, imm8 - if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) - { - emit->Write8(normalops[op].eaximm8); - emit->Write8((u8)operand.offset); - return; - } - // mov reg, imm8 - if (!scale && op == nrmMOV) - { - emit->Write8(0xB0 + (offsetOrBaseReg & 7)); - emit->Write8((u8)operand.offset); - return; - } - // op r/m8, imm8 - emit->Write8(normalops[op].imm8); - immToWrite = 8; - } - else if ((operand.scale == SCALE_IMM16 && bits == 16) || - (operand.scale == SCALE_IMM32 && bits == 32) || - (operand.scale == SCALE_IMM32 && bits == 64)) - { - // Try to save immediate size if we can, but first check to see - // if the instruction supports simm8. - // op r/m, imm8 - if (normalops[op].simm8 != 0xCC && - ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || - (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) - { - emit->Write8(normalops[op].simm8); - immToWrite = 8; - } - else - { - // mov reg, imm - if (!scale && op == nrmMOV && bits != 64) - { - emit->Write8(0xB8 + (offsetOrBaseReg & 7)); - if (bits == 16) - emit->Write16((u16)operand.offset); - else - emit->Write32((u32)operand.offset); - return; - } - // op eax, imm - if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) - { - emit->Write8(normalops[op].eaximm32); - if (bits == 16) - emit->Write16((u16)operand.offset); - else - emit->Write32((u32)operand.offset); - return; - } - // op r/m, imm - emit->Write8(normalops[op].imm32); - immToWrite = bits == 16 ? 16 : 32; - } - } - else if ((operand.scale == SCALE_IMM8 && bits == 16) || - (operand.scale == SCALE_IMM8 && bits == 32) || - (operand.scale == SCALE_IMM8 && bits == 64)) - { - // op r/m, imm8 - emit->Write8(normalops[op].simm8); - immToWrite = 8; - } - else if (operand.scale == SCALE_IMM64 && bits == 64) - { - if (scale) - { - ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); - } - // mov reg64, imm64 - else if (op == nrmMOV) - { - emit->Write8(0xB8 + (offsetOrBaseReg & 7)); - emit->Write64((u64)operand.offset); - return; - } - ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm"); - } - else - { - ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); - } - _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM - } - else - { - _operandReg = (X64Reg)operand.offsetOrBaseReg; - WriteRex(emit, bits, bits, _operandReg); - // op r/m, reg - if (toRM) - { - emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); - } - // op reg, r/m - else - { - emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); - } - } - WriteRest(emit, immToWrite >> 3, _operandReg); - switch (immToWrite) - { - case 0: - break; - case 8: - emit->Write8((u8)operand.offset); - break; - case 16: - emit->Write16((u16)operand.offset); - break; - case 32: - emit->Write32((u32)operand.offset); - break; - default: - ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); - } -} - -void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2) -{ - if (a1.IsImm()) - { - //Booh! Can't write to an imm - ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm"); - return; - } - if (a2.IsImm()) - { - a1.WriteNormalOp(emit, true, op, a2, bits); - } - else - { - if (a1.IsSimpleReg()) - { - a2.WriteNormalOp(emit, false, op, a1, bits); - } - else - { - ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory"); - a1.WriteNormalOp(emit, true, op, a2, bits); - } - } -} - -void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} -void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} -void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} -void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} -void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} -void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} -void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} -void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) -{ - if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) - LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); - WriteNormalOp(this, bits, nrmMOV, a1, a2); -} -void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} -void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} -void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} - -void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) -{ - CheckFlags(); - if (bits == 8) - { - ASSERT_MSG(0, "IMUL - illegal bit size!"); - return; - } - - if (a1.IsImm()) - { - ASSERT_MSG(0, "IMUL - second arg cannot be imm!"); - return; - } - - if (!a2.IsImm()) - { - ASSERT_MSG(0, "IMUL - third arg must be imm!"); - return; - } - - if (bits == 16) - Write8(0x66); - a1.WriteRex(this, bits, bits, regOp); - - if (a2.GetImmBits() == 8 || - (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || - (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) - { - Write8(0x6B); - a1.WriteRest(this, 1, regOp); - Write8((u8)a2.offset); - } - else - { - Write8(0x69); - if (a2.GetImmBits() == 16 && bits == 16) - { - a1.WriteRest(this, 2, regOp); - Write16((u16)a2.offset); - } - else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) - { - a1.WriteRest(this, 4, regOp); - Write32((u32)a2.offset); - } - else - { - ASSERT_MSG(0, "IMUL - unhandled case!"); - } - } -} - -void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) -{ - CheckFlags(); - if (bits == 8) - { - ASSERT_MSG(0, "IMUL - illegal bit size!"); - return; - } - - if (a.IsImm()) - { - IMUL(bits, regOp, R(regOp), a) ; - return; - } - - if (bits == 16) - Write8(0x66); - a.WriteRex(this, bits, bits, regOp); - Write8(0x0F); - Write8(0xAF); - a.WriteRest(this, 0, regOp); -} - - -void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) -{ - if (opPrefix) - Write8(opPrefix); - arg.operandReg = regOp; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - if (op > 0xFF) - Write8((op >> 8) & 0xFF); - Write8(op & 0xFF); - arg.WriteRest(this, extrabytes); -} - -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) -{ - WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); -} - -static int GetVEXmmmmm(u16 op) -{ - // Currently, only 0x38 and 0x3A are used as secondary escape byte. - if ((op >> 8) == 0x3A) - return 3; - else if ((op >> 8) == 0x38) - return 2; - else - return 1; -} - -static int GetVEXpp(u8 opPrefix) -{ - if (opPrefix == 0x66) - return 1; - else if (opPrefix == 0xF3) - return 2; - else if (opPrefix == 0xF2) - return 3; - else - return 0; -} - -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) -{ - if (!Common::cpu_info.bAVX) - ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); - int mmmmm = GetVEXmmmmm(op); - int pp = GetVEXpp(opPrefix); - // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here - arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); - Write8(op & 0xFF); - arg.WriteRest(this, extrabytes, regOp1); -} - -// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 -void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) -{ - if (size != 32 && size != 64) - ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); - int mmmmm = GetVEXmmmmm(op); - int pp = GetVEXpp(opPrefix); - arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64); - Write8(op & 0xFF); - arg.WriteRest(this, extrabytes, regOp1); -} - -void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) -{ - CheckFlags(); - if (!Common::cpu_info.bBMI1) - ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); - WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); -} - -void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) -{ - CheckFlags(); - if (!Common::cpu_info.bBMI2) - ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer."); - WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); -} - -void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);} -void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);} - -void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) -{ -#ifdef _M_X86_64 - // Alternate encoding - // This does not display correctly in MSVC's debugger, it thinks it's a MOVD - arg.operandReg = dest; - Write8(0x66); - arg.WriteRex(this, 64, 0); - Write8(0x0f); - Write8(0x6E); - arg.WriteRest(this, 0); -#else - arg.operandReg = dest; - Write8(0xF3); - Write8(0x0f); - Write8(0x7E); - arg.WriteRest(this, 0); -#endif -} - -void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) -{ - if (src > 7 || arg.IsSimpleReg()) - { - // Alternate encoding - // This does not display correctly in MSVC's debugger, it thinks it's a MOVD - arg.operandReg = src; - Write8(0x66); - arg.WriteRex(this, 64, 0); - Write8(0x0f); - Write8(0x7E); - arg.WriteRest(this, 0); - } - else - { - arg.operandReg = src; - arg.WriteRex(this, 0, 0); - Write8(0x66); - Write8(0x0f); - Write8(0xD6); - arg.WriteRest(this, 0); - } -} - -void XEmitter::WriteMXCSR(OpArg arg, int ext) -{ - if (arg.IsImm() || arg.IsSimpleReg()) - ASSERT_MSG(0, "MXCSR - invalid operand"); - - arg.operandReg = ext; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - Write8(0xAE); - arg.WriteRest(this); -} - -void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} -void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} - -void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} -void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} -void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} - -void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} -void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} -void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} -void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} -void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} -void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} -void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} -void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} -void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} -void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} -void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} -void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} -void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} -void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} -void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} - -void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} -void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} -void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} -void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} -void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} -void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} -void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} -void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} -void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} -void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} -void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} -void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} -void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} -void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} -void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} -void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} -void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} -void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} -void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} -void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} -void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} -void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} -void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } -void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} -void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} -void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} - -void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} - -void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered -void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered -void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} - -void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} -void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} - -void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} -void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} - -void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } -void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } - -void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } -void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } - -void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} -void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} - -void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} -void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} - -void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} -void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} -void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} -void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} -void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} -void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} - -void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} -void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} -void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} -void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} - -void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} -void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} -void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} -void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} - -void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} - -void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} -void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} - -void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only - -// THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} -void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} - -void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} -void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} - -void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) -{ - if (Common::cpu_info.bSSE3) - { - WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup - } - else - { - // Simulate this instruction with SSE2 instructions - if (!arg.IsSimpleReg(regOp)) - MOVSD(regOp, arg); - UNPCKLPD(regOp, R(regOp)); - } -} - -//There are a few more left - -// Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} -void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} -void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} - -void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} -void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} -void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);} -void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);} - -void XEmitter::PSRLW(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLD(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLQ(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLQ(X64Reg reg, OpArg arg) -{ - WriteSSEOp(0x66, 0xd3, reg, arg); -} - -void XEmitter::PSRLDQ(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLW(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLD(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLQ(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); - Write8(shift); -} - -void XEmitter::PSLLDQ(X64Reg reg, int shift) { - WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg)); - Write8(shift); -} - -void XEmitter::PSRAW(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg)); - Write8(shift); -} - -void XEmitter::PSRAD(X64Reg reg, int shift) -{ - WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg)); - Write8(shift); -} - -void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) -{ - if (!Common::cpu_info.bSSSE3) - ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); - WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); -} - -void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) -{ - if (!Common::cpu_info.bSSE4_1) - ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); - WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); -} - -void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} -void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} -void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} -void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} - -void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} -void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} -void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} -void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} -void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} -void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} -void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} -void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} - -void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} -void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} -void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} -void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} -void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} -void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} -void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} -void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} -void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} -void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} -void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} -void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} - -void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} -void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} -void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} -void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } -void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } - -void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} - -void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} -void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} -void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} -void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} - -void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} -void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} -void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} -void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} - -void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} -void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);} -void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} -void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} - -void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} -void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} -void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} -void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} - -void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} -void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} -void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} -void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} - -void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} -void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} - -void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);} -void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);} -void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);} - -void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);} -void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);} -void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);} - -void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} -void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} - -void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } -void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} - -void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } -void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } -void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } -void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } - -void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } -void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} - -// VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} -void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} -void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} -void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} - -void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } -void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } -void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } -void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } - -void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } -void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } -void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } -void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } - -void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } -void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } -void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } -void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } -void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } -void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } -void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } - -void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} -void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} -void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} -void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} -void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} -void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} - -// Prefixes - -void XEmitter::LOCK() { Write8(0xF0); } -void XEmitter::REP() { Write8(0xF3); } -void XEmitter::REPNE() { Write8(0xF2); } -void XEmitter::FSOverride() { Write8(0x64); } -void XEmitter::GSOverride() { Write8(0x65); } - -void XEmitter::FWAIT() -{ - Write8(0x9B); -} - -// TODO: make this more generic -void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) -{ - int mf = 0; - ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); - switch (bits) - { - case 32: mf = 0; break; - case 64: mf = 4; break; - case 80: mf = 2; break; - default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); - } - Write8(0xd9 | mf); - // x87 instructions use the reg field of the ModR/M byte as opcode: - if (bits == 80) - op = op_80b; - arg.WriteRest(this, 0, (X64Reg) op); -} - -void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} -void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} -void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} -void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } - -void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } - -void XCodeBlock::PoisonMemory() { - // x86/64: 0xCC = breakpoint - memset(region, 0xCC, region_size); -} - -} diff --git a/src/common/x64_emitter.h b/src/common/x64_emitter.h deleted file mode 100644 index 369bfaa08..000000000 --- a/src/common/x64_emitter.h +++ /dev/null @@ -1,1067 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#pragma once - -#include "assert.h" -#include "common_types.h" -#include "code_block.h" - -#if defined(_M_X86_64) && !defined(_ARCH_64) -#define _ARCH_64 -#endif - -#ifdef _ARCH_64 -#define PTRBITS 64 -#else -#define PTRBITS 32 -#endif - -namespace Gen -{ - -enum X64Reg -{ - EAX = 0, EBX = 3, ECX = 1, EDX = 2, - ESI = 6, EDI = 7, EBP = 5, ESP = 4, - - RAX = 0, RBX = 3, RCX = 1, RDX = 2, - RSI = 6, RDI = 7, RBP = 5, RSP = 4, - R8 = 8, R9 = 9, R10 = 10,R11 = 11, - R12 = 12,R13 = 13,R14 = 14,R15 = 15, - - AL = 0, BL = 3, CL = 1, DL = 2, - SIL = 6, DIL = 7, BPL = 5, SPL = 4, - AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106, - - AX = 0, BX = 3, CX = 1, DX = 2, - SI = 6, DI = 7, BP = 5, SP = 4, - - XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, - - YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15, - - INVALID_REG = 0xFFFFFFFF -}; - -enum CCFlags -{ - CC_O = 0, - CC_NO = 1, - CC_B = 2, CC_C = 2, CC_NAE = 2, - CC_NB = 3, CC_NC = 3, CC_AE = 3, - CC_Z = 4, CC_E = 4, - CC_NZ = 5, CC_NE = 5, - CC_BE = 6, CC_NA = 6, - CC_NBE = 7, CC_A = 7, - CC_S = 8, - CC_NS = 9, - CC_P = 0xA, CC_PE = 0xA, - CC_NP = 0xB, CC_PO = 0xB, - CC_L = 0xC, CC_NGE = 0xC, - CC_NL = 0xD, CC_GE = 0xD, - CC_LE = 0xE, CC_NG = 0xE, - CC_NLE = 0xF, CC_G = 0xF -}; - -enum -{ - NUMGPRs = 16, - NUMXMMs = 16, -}; - -enum -{ - SCALE_NONE = 0, - SCALE_1 = 1, - SCALE_2 = 2, - SCALE_4 = 4, - SCALE_8 = 8, - SCALE_ATREG = 16, - //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG - SCALE_NOBASE_2 = 34, - SCALE_NOBASE_4 = 36, - SCALE_NOBASE_8 = 40, - SCALE_RIP = 0xFF, - SCALE_IMM8 = 0xF0, - SCALE_IMM16 = 0xF1, - SCALE_IMM32 = 0xF2, - SCALE_IMM64 = 0xF3, -}; - -enum NormalOp { - nrmADD, - nrmADC, - nrmSUB, - nrmSBB, - nrmAND, - nrmOR , - nrmXOR, - nrmMOV, - nrmTEST, - nrmCMP, - nrmXCHG, -}; - -enum { - CMP_EQ = 0, - CMP_LT = 1, - CMP_LE = 2, - CMP_UNORD = 3, - CMP_NEQ = 4, - CMP_NLT = 5, - CMP_NLE = 6, - CMP_ORD = 7, -}; - -enum FloatOp { - floatLD = 0, - floatST = 2, - floatSTP = 3, - floatLD80 = 5, - floatSTP80 = 7, - - floatINVALID = -1, -}; - -enum FloatRound { - FROUND_NEAREST = 0, - FROUND_FLOOR = 1, - FROUND_CEIL = 2, - FROUND_ZERO = 3, - FROUND_MXCSR = 4, - - FROUND_RAISE_PRECISION = 0, - FROUND_IGNORE_PRECISION = 8, -}; - -class XEmitter; - -// RIP addressing does not benefit from micro op fusion on Core arch -struct OpArg -{ - OpArg() {} // dummy op arg, used for storage - OpArg(u64 _offset, int _scale, X64Reg rmReg = RAX, X64Reg scaledReg = RAX) - { - operandReg = 0; - scale = (u8)_scale; - offsetOrBaseReg = (u16)rmReg; - indexReg = (u16)scaledReg; - //if scale == 0 never mind offsetting - offset = _offset; - } - bool operator==(const OpArg &b) const - { - return operandReg == b.operandReg && scale == b.scale && offsetOrBaseReg == b.offsetOrBaseReg && - indexReg == b.indexReg && offset == b.offset; - } - void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; - void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; - void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; - void WriteFloatModRM(XEmitter *emit, FloatOp op); - void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); - // This one is public - must be written to - u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available. - u16 operandReg; - - void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const; - bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;} - bool IsSimpleReg() const {return scale == SCALE_NONE;} - bool IsSimpleReg(X64Reg reg) const - { - if (!IsSimpleReg()) - return false; - return GetSimpleReg() == reg; - } - - bool CanDoOpWith(const OpArg &other) const - { - if (IsSimpleReg()) return true; - if (!IsSimpleReg() && !other.IsSimpleReg() && !other.IsImm()) return false; - return true; - } - - int GetImmBits() const - { - switch (scale) - { - case SCALE_IMM8: return 8; - case SCALE_IMM16: return 16; - case SCALE_IMM32: return 32; - case SCALE_IMM64: return 64; - default: return -1; - } - } - - void SetImmBits(int bits) { - switch (bits) - { - case 8: scale = SCALE_IMM8; break; - case 16: scale = SCALE_IMM16; break; - case 32: scale = SCALE_IMM32; break; - case 64: scale = SCALE_IMM64; break; - } - } - - X64Reg GetSimpleReg() const - { - if (scale == SCALE_NONE) - return (X64Reg)offsetOrBaseReg; - else - return INVALID_REG; - } - - u32 GetImmValue() const { - return (u32)offset; - } - - // For loops. - void IncreaseOffset(int sz) { - offset += sz; - } - -private: - u8 scale; - u16 offsetOrBaseReg; - u16 indexReg; -}; - -inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} -template -inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);} -inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} -inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} - -inline OpArg MDisp(X64Reg value, int offset) -{ - return OpArg((u32)offset, SCALE_ATREG, value); -} - -inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) -{ - return OpArg(offset, scale, base, scaled); -} - -inline OpArg MScaled(X64Reg scaled, int scale, int offset) -{ - if (scale == SCALE_1) - return OpArg(offset, SCALE_ATREG, scaled); - else - return OpArg(offset, scale | 0x20, RAX, scaled); -} - -inline OpArg MRegSum(X64Reg base, X64Reg offset) -{ - return MComplex(base, offset, 1, 0); -} - -inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);} -inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used -inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);} -inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);} -inline OpArg UImmAuto(u32 imm) { - return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8); -} -inline OpArg SImmAuto(s32 imm) { - return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8); -} - -#ifdef _ARCH_64 -inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} -#else -inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);} -#endif - -inline u32 PtrOffset(const void* ptr, const void* base) -{ -#ifdef _ARCH_64 - s64 distance = (s64)ptr-(s64)base; - if (distance >= 0x80000000LL || - distance < -0x80000000LL) - { - ASSERT_MSG(0, "pointer offset out of range"); - return 0; - } - - return (u32)distance; -#else - return (u32)ptr-(u32)base; -#endif -} - -//usage: int a[]; ARRAY_OFFSET(a,10) -#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0])) -//usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) - -struct FixupBranch -{ - u8 *ptr; - int type; //0 = 8bit 1 = 32bit -}; - -enum SSECompare -{ - EQ = 0, - LT, - LE, - UNORD, - NEQ, - NLT, - NLE, - ORD, -}; - -typedef const u8* JumpTarget; - -class XEmitter -{ - friend struct OpArg; // for Write8 etc -private: - u8 *code; - bool flags_locked; - - void CheckFlags(); - - void Rex(int w, int r, int x, int b); - void WriteSimple1Byte(int bits, u8 byte, X64Reg reg); - void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); - void WriteMulDivType(int bits, OpArg src, int ext); - void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); - void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); - void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); - void WriteMXCSR(OpArg arg, int ext); - void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); - void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); - - void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); - -protected: - inline void Write8(u8 value) {*code++ = value;} - inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} - inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} - inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} - -public: - XEmitter() { code = nullptr; flags_locked = false; } - XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; } - virtual ~XEmitter() {} - - void WriteModRM(int mod, int rm, int reg); - void WriteSIB(int scale, int index, int base); - - void SetCodePtr(u8 *ptr); - void ReserveCodeSpace(int bytes); - const u8 *AlignCode4(); - const u8 *AlignCode16(); - const u8 *AlignCodePage(); - const u8 *GetCodePtr() const; - u8 *GetWritableCodePtr(); - - void LockFlags() { flags_locked = true; } - void UnlockFlags() { flags_locked = false; } - - // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU - // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., - // INC and DEC are slow on Intel Core, but not on AMD. They create a - // false flag dependency because they only update a subset of the flags. - // XCHG is SLOW and should be avoided. - - // Debug breakpoint - void INT3(); - - // Do nothing - void NOP(size_t count = 1); - - // Save energy in wait-loops on P4 only. Probably not too useful. - void PAUSE(); - - // Flag control - void STC(); - void CLC(); - void CMC(); - - // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD! - void LAHF(); // 3 cycle vector path - void SAHF(); // direct path fast - - - // Stack control - void PUSH(X64Reg reg); - void POP(X64Reg reg); - void PUSH(int bits, const OpArg ®); - void POP(int bits, const OpArg ®); - void PUSHF(); - void POPF(); - - // Flow control - void RET(); - void RET_FAST(); - void UD2(); - FixupBranch J(bool force5bytes = false); - - void JMP(const u8 * addr, bool force5Bytes = false); - void JMP(OpArg arg); - void JMPptr(const OpArg &arg); - void JMPself(); //infinite loop! -#ifdef CALL -#undef CALL -#endif - void CALL(const void *fnptr); - void CALLptr(OpArg arg); - - FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); - //void J_CC(CCFlags conditionCode, JumpTarget target); - void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); - - void SetJumpTarget(const FixupBranch &branch); - - void SETcc(CCFlags flag, OpArg dest); - // Note: CMOV brings small if any benefit on current cpus. - void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag); - - // Fences - void LFENCE(); - void MFENCE(); - void SFENCE(); - - // Bit scan - void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit - void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit - - // Cache control - enum PrefetchLevel - { - PF_NTA, //Non-temporal (data used once and only once) - PF_T0, //All cache levels - PF_T1, //Levels 2+ (aliased to T0 on AMD) - PF_T2, //Levels 3+ (aliased to T0 on AMD) - }; - void PREFETCH(PrefetchLevel level, OpArg arg); - void MOVNTI(int bits, OpArg dest, X64Reg src); - void MOVNTDQ(OpArg arg, X64Reg regOp); - void MOVNTPS(OpArg arg, X64Reg regOp); - void MOVNTPD(OpArg arg, X64Reg regOp); - - // Multiplication / division - void MUL(int bits, OpArg src); //UNSIGNED - void IMUL(int bits, OpArg src); //SIGNED - void IMUL(int bits, X64Reg regOp, OpArg src); - void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); - void DIV(int bits, OpArg src); - void IDIV(int bits, OpArg src); - - // Shift - void ROL(int bits, OpArg dest, OpArg shift); - void ROR(int bits, OpArg dest, OpArg shift); - void RCL(int bits, OpArg dest, OpArg shift); - void RCR(int bits, OpArg dest, OpArg shift); - void SHL(int bits, OpArg dest, OpArg shift); - void SHR(int bits, OpArg dest, OpArg shift); - void SAR(int bits, OpArg dest, OpArg shift); - - // Bit Test - void BT(int bits, OpArg dest, OpArg index); - void BTS(int bits, OpArg dest, OpArg index); - void BTR(int bits, OpArg dest, OpArg index); - void BTC(int bits, OpArg dest, OpArg index); - - // Double-Precision Shift - void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); - void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); - - // Extend EAX into EDX in various ways - void CWD(int bits = 16); - inline void CDQ() {CWD(32);} - inline void CQO() {CWD(64);} - void CBW(int bits = 8); - inline void CWDE() {CBW(16);} - inline void CDQE() {CBW(32);} - - // Load effective address - void LEA(int bits, X64Reg dest, OpArg src); - - // Integer arithmetic - void NEG (int bits, OpArg src); - void ADD (int bits, const OpArg &a1, const OpArg &a2); - void ADC (int bits, const OpArg &a1, const OpArg &a2); - void SUB (int bits, const OpArg &a1, const OpArg &a2); - void SBB (int bits, const OpArg &a1, const OpArg &a2); - void AND (int bits, const OpArg &a1, const OpArg &a2); - void CMP (int bits, const OpArg &a1, const OpArg &a2); - - // Bit operations - void NOT (int bits, OpArg src); - void OR (int bits, const OpArg &a1, const OpArg &a2); - void XOR (int bits, const OpArg &a1, const OpArg &a2); - void MOV (int bits, const OpArg &a1, const OpArg &a2); - void TEST(int bits, const OpArg &a1, const OpArg &a2); - - // Are these useful at all? Consider removing. - void XCHG(int bits, const OpArg &a1, const OpArg &a2); - void XCHG_AHAL(); - - // Byte swapping (32 and 64-bit only). - void BSWAP(int bits, X64Reg reg); - - // Sign/zero extension - void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary - void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); - - // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE. - void MOVBE(int dbits, const OpArg& dest, const OpArg& src); - - // Available only on AMD >= Phenom or Intel >= Haswell - void LZCNT(int bits, X64Reg dest, OpArg src); - // Note: this one is actually part of BMI1 - void TZCNT(int bits, X64Reg dest, OpArg src); - - // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) - void STMXCSR(OpArg memloc); - void LDMXCSR(OpArg memloc); - - // Prefixes - void LOCK(); - void REP(); - void REPNE(); - void FSOverride(); - void GSOverride(); - - // x87 - enum x87StatusWordBits { - x87_InvalidOperation = 0x1, - x87_DenormalizedOperand = 0x2, - x87_DivisionByZero = 0x4, - x87_Overflow = 0x8, - x87_Underflow = 0x10, - x87_Precision = 0x20, - x87_StackFault = 0x40, - x87_ErrorSummary = 0x80, - x87_C0 = 0x100, - x87_C1 = 0x200, - x87_C2 = 0x400, - x87_TopOfStack = 0x2000 | 0x1000 | 0x800, - x87_C3 = 0x4000, - x87_FPUBusy = 0x8000, - }; - - void FLD(int bits, OpArg src); - void FST(int bits, OpArg dest); - void FSTP(int bits, OpArg dest); - void FNSTSW_AX(); - void FWAIT(); - - // SSE/SSE2: Floating point arithmetic - void ADDSS(X64Reg regOp, OpArg arg); - void ADDSD(X64Reg regOp, OpArg arg); - void SUBSS(X64Reg regOp, OpArg arg); - void SUBSD(X64Reg regOp, OpArg arg); - void MULSS(X64Reg regOp, OpArg arg); - void MULSD(X64Reg regOp, OpArg arg); - void DIVSS(X64Reg regOp, OpArg arg); - void DIVSD(X64Reg regOp, OpArg arg); - void MINSS(X64Reg regOp, OpArg arg); - void MINSD(X64Reg regOp, OpArg arg); - void MAXSS(X64Reg regOp, OpArg arg); - void MAXSD(X64Reg regOp, OpArg arg); - void SQRTSS(X64Reg regOp, OpArg arg); - void SQRTSD(X64Reg regOp, OpArg arg); - void RSQRTSS(X64Reg regOp, OpArg arg); - - // SSE/SSE2: Floating point bitwise (yes) - void CMPSS(X64Reg regOp, OpArg arg, u8 compare); - void CMPSD(X64Reg regOp, OpArg arg, u8 compare); - - inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } - inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } - inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } - inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } - inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } - inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } - inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } - - // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) - void ADDPS(X64Reg regOp, OpArg arg); - void ADDPD(X64Reg regOp, OpArg arg); - void SUBPS(X64Reg regOp, OpArg arg); - void SUBPD(X64Reg regOp, OpArg arg); - void CMPPS(X64Reg regOp, OpArg arg, u8 compare); - void CMPPD(X64Reg regOp, OpArg arg, u8 compare); - void MULPS(X64Reg regOp, OpArg arg); - void MULPD(X64Reg regOp, OpArg arg); - void DIVPS(X64Reg regOp, OpArg arg); - void DIVPD(X64Reg regOp, OpArg arg); - void MINPS(X64Reg regOp, OpArg arg); - void MINPD(X64Reg regOp, OpArg arg); - void MAXPS(X64Reg regOp, OpArg arg); - void MAXPD(X64Reg regOp, OpArg arg); - void SQRTPS(X64Reg regOp, OpArg arg); - void SQRTPD(X64Reg regOp, OpArg arg); - void RCPPS(X64Reg regOp, OpArg arg); - void RSQRTPS(X64Reg regOp, OpArg arg); - - // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) - void ANDPS(X64Reg regOp, OpArg arg); - void ANDPD(X64Reg regOp, OpArg arg); - void ANDNPS(X64Reg regOp, OpArg arg); - void ANDNPD(X64Reg regOp, OpArg arg); - void ORPS(X64Reg regOp, OpArg arg); - void ORPD(X64Reg regOp, OpArg arg); - void XORPS(X64Reg regOp, OpArg arg); - void XORPD(X64Reg regOp, OpArg arg); - - // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. - void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); - void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); - - // SSE/SSE2: Useful alternative to shuffle in some cases. - void MOVDDUP(X64Reg regOp, OpArg arg); - - // TODO: Actually implement -#if 0 - // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... - void ADDSUBPS(X64Reg dest, OpArg src); - void ADDSUBPD(X64Reg dest, OpArg src); - void HADDPD(X64Reg dest, OpArg src); - void HSUBPS(X64Reg dest, OpArg src); - void HSUBPD(X64Reg dest, OpArg src); - - // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". - void DPPD(X64Reg dest, OpArg src, u8 arg); - - // These are probably useful for VFPU emulation. - void INSERTPS(X64Reg dest, OpArg src, u8 arg); - void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); -#endif - - // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. - void HADDPS(X64Reg dest, OpArg src); - - // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". - void DPPS(X64Reg dest, OpArg src, u8 arg); - - void UNPCKLPS(X64Reg dest, OpArg src); - void UNPCKHPS(X64Reg dest, OpArg src); - void UNPCKLPD(X64Reg dest, OpArg src); - void UNPCKHPD(X64Reg dest, OpArg src); - - // SSE/SSE2: Compares. - void COMISS(X64Reg regOp, OpArg arg); - void COMISD(X64Reg regOp, OpArg arg); - void UCOMISS(X64Reg regOp, OpArg arg); - void UCOMISD(X64Reg regOp, OpArg arg); - - // SSE/SSE2: Moves. Use the right data type for your data, in most cases. - void MOVAPS(X64Reg regOp, OpArg arg); - void MOVAPD(X64Reg regOp, OpArg arg); - void MOVAPS(OpArg arg, X64Reg regOp); - void MOVAPD(OpArg arg, X64Reg regOp); - - void MOVUPS(X64Reg regOp, OpArg arg); - void MOVUPD(X64Reg regOp, OpArg arg); - void MOVUPS(OpArg arg, X64Reg regOp); - void MOVUPD(OpArg arg, X64Reg regOp); - - void MOVDQA(X64Reg regOp, OpArg arg); - void MOVDQA(OpArg arg, X64Reg regOp); - void MOVDQU(X64Reg regOp, OpArg arg); - void MOVDQU(OpArg arg, X64Reg regOp); - - void MOVSS(X64Reg regOp, OpArg arg); - void MOVSD(X64Reg regOp, OpArg arg); - void MOVSS(OpArg arg, X64Reg regOp); - void MOVSD(OpArg arg, X64Reg regOp); - - void MOVLPS(X64Reg regOp, OpArg arg); - void MOVLPD(X64Reg regOp, OpArg arg); - void MOVLPS(OpArg arg, X64Reg regOp); - void MOVLPD(OpArg arg, X64Reg regOp); - - void MOVHPS(X64Reg regOp, OpArg arg); - void MOVHPD(X64Reg regOp, OpArg arg); - void MOVHPS(OpArg arg, X64Reg regOp); - void MOVHPD(OpArg arg, X64Reg regOp); - - void MOVHLPS(X64Reg regOp1, X64Reg regOp2); - void MOVLHPS(X64Reg regOp1, X64Reg regOp2); - - void MOVD_xmm(X64Reg dest, const OpArg &arg); - void MOVQ_xmm(X64Reg dest, OpArg arg); - void MOVD_xmm(const OpArg &arg, X64Reg src); - void MOVQ_xmm(OpArg arg, X64Reg src); - - // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. - void MOVMSKPS(X64Reg dest, OpArg arg); - void MOVMSKPD(X64Reg dest, OpArg arg); - - // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. - void MASKMOVDQU(X64Reg dest, X64Reg src); - void LDDQU(X64Reg dest, OpArg src); - - // SSE/SSE2: Data type conversions. - void CVTPS2PD(X64Reg dest, OpArg src); - void CVTPD2PS(X64Reg dest, OpArg src); - void CVTSS2SD(X64Reg dest, OpArg src); - void CVTSI2SS(X64Reg dest, OpArg src); - void CVTSD2SS(X64Reg dest, OpArg src); - void CVTSI2SD(X64Reg dest, OpArg src); - void CVTDQ2PD(X64Reg regOp, OpArg arg); - void CVTPD2DQ(X64Reg regOp, OpArg arg); - void CVTDQ2PS(X64Reg regOp, OpArg arg); - void CVTPS2DQ(X64Reg regOp, OpArg arg); - - void CVTTPS2DQ(X64Reg regOp, OpArg arg); - void CVTTPD2DQ(X64Reg regOp, OpArg arg); - - // Destinations are X64 regs (rax, rbx, ...) for these instructions. - void CVTSS2SI(X64Reg xregdest, OpArg src); - void CVTSD2SI(X64Reg xregdest, OpArg src); - void CVTTSS2SI(X64Reg xregdest, OpArg arg); - void CVTTSD2SI(X64Reg xregdest, OpArg arg); - - // SSE2: Packed integer instructions - void PACKSSDW(X64Reg dest, OpArg arg); - void PACKSSWB(X64Reg dest, OpArg arg); - void PACKUSDW(X64Reg dest, OpArg arg); - void PACKUSWB(X64Reg dest, OpArg arg); - - void PUNPCKLBW(X64Reg dest, const OpArg &arg); - void PUNPCKLWD(X64Reg dest, const OpArg &arg); - void PUNPCKLDQ(X64Reg dest, const OpArg &arg); - void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); - - void PTEST(X64Reg dest, OpArg arg); - void PAND(X64Reg dest, OpArg arg); - void PANDN(X64Reg dest, OpArg arg); - void PXOR(X64Reg dest, OpArg arg); - void POR(X64Reg dest, OpArg arg); - - void PADDB(X64Reg dest, OpArg arg); - void PADDW(X64Reg dest, OpArg arg); - void PADDD(X64Reg dest, OpArg arg); - void PADDQ(X64Reg dest, OpArg arg); - - void PADDSB(X64Reg dest, OpArg arg); - void PADDSW(X64Reg dest, OpArg arg); - void PADDUSB(X64Reg dest, OpArg arg); - void PADDUSW(X64Reg dest, OpArg arg); - - void PSUBB(X64Reg dest, OpArg arg); - void PSUBW(X64Reg dest, OpArg arg); - void PSUBD(X64Reg dest, OpArg arg); - void PSUBQ(X64Reg dest, OpArg arg); - - void PSUBSB(X64Reg dest, OpArg arg); - void PSUBSW(X64Reg dest, OpArg arg); - void PSUBUSB(X64Reg dest, OpArg arg); - void PSUBUSW(X64Reg dest, OpArg arg); - - void PAVGB(X64Reg dest, OpArg arg); - void PAVGW(X64Reg dest, OpArg arg); - - void PCMPEQB(X64Reg dest, OpArg arg); - void PCMPEQW(X64Reg dest, OpArg arg); - void PCMPEQD(X64Reg dest, OpArg arg); - - void PCMPGTB(X64Reg dest, OpArg arg); - void PCMPGTW(X64Reg dest, OpArg arg); - void PCMPGTD(X64Reg dest, OpArg arg); - - void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); - void PINSRW(X64Reg dest, OpArg arg, u8 subreg); - - void PMADDWD(X64Reg dest, OpArg arg); - void PSADBW(X64Reg dest, OpArg arg); - - void PMAXSW(X64Reg dest, OpArg arg); - void PMAXUB(X64Reg dest, OpArg arg); - void PMINSW(X64Reg dest, OpArg arg); - void PMINUB(X64Reg dest, OpArg arg); - // SSE4: More MAX/MIN instructions. - void PMINSB(X64Reg dest, OpArg arg); - void PMINSD(X64Reg dest, OpArg arg); - void PMINUW(X64Reg dest, OpArg arg); - void PMINUD(X64Reg dest, OpArg arg); - void PMAXSB(X64Reg dest, OpArg arg); - void PMAXSD(X64Reg dest, OpArg arg); - void PMAXUW(X64Reg dest, OpArg arg); - void PMAXUD(X64Reg dest, OpArg arg); - - void PMOVMSKB(X64Reg dest, OpArg arg); - void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); - void PSHUFB(X64Reg dest, OpArg arg); - - void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); - void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); - - void PSRLW(X64Reg reg, int shift); - void PSRLD(X64Reg reg, int shift); - void PSRLQ(X64Reg reg, int shift); - void PSRLQ(X64Reg reg, OpArg arg); - void PSRLDQ(X64Reg reg, int shift); - - void PSLLW(X64Reg reg, int shift); - void PSLLD(X64Reg reg, int shift); - void PSLLQ(X64Reg reg, int shift); - void PSLLDQ(X64Reg reg, int shift); - - void PSRAW(X64Reg reg, int shift); - void PSRAD(X64Reg reg, int shift); - - // SSE4: data type conversions - void PMOVSXBW(X64Reg dest, OpArg arg); - void PMOVSXBD(X64Reg dest, OpArg arg); - void PMOVSXBQ(X64Reg dest, OpArg arg); - void PMOVSXWD(X64Reg dest, OpArg arg); - void PMOVSXWQ(X64Reg dest, OpArg arg); - void PMOVSXDQ(X64Reg dest, OpArg arg); - void PMOVZXBW(X64Reg dest, OpArg arg); - void PMOVZXBD(X64Reg dest, OpArg arg); - void PMOVZXBQ(X64Reg dest, OpArg arg); - void PMOVZXWD(X64Reg dest, OpArg arg); - void PMOVZXWQ(X64Reg dest, OpArg arg); - void PMOVZXDQ(X64Reg dest, OpArg arg); - - // SSE4: variable blend instructions (xmm0 implicit argument) - void PBLENDVB(X64Reg dest, OpArg arg); - void BLENDVPS(X64Reg dest, OpArg arg); - void BLENDVPD(X64Reg dest, OpArg arg); - void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); - void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); - - // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) - void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); - void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); - void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); - void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); - - inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } - - // AVX - void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); - void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - // FMA3 - void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - // VEX GPR instructions - void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); - void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void BLSR(int bits, X64Reg regOp, OpArg arg); - void BLSMSK(int bits, X64Reg regOp, OpArg arg); - void BLSI(int bits, X64Reg regOp, OpArg arg); - void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void RDTSC(); - - // Utility functions - // The difference between this and CALL is that this aligns the stack - // where appropriate. - void ABI_CallFunction(const void *func); - template - void ABI_CallFunction(T (*func)()) { - ABI_CallFunction((const void *)func); - } - - void ABI_CallFunction(const u8 *func) { - ABI_CallFunction((const void *)func); - } - void ABI_CallFunctionC16(const void *func, u16 param1); - void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); - - - // These only support u32 parameters, but that's enough for a lot of uses. - // These will destroy the 1 or 2 first "parameter regs". - void ABI_CallFunctionC(const void *func, u32 param1); - void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); - void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); - void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); - void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); - void ABI_CallFunctionP(const void *func, void *param1); - void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); - void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); - void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); - void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); - void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); - void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); - void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); - - // Pass a register as a parameter. - void ABI_CallFunctionR(const void *func, X64Reg reg1); - void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); - - template - void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { - ABI_CallFunctionC((const void *)func, param1); - } - - // A function that doesn't have any control over what it will do to regs, - // such as the dispatcher, should be surrounded by these. - void ABI_PushAllCalleeSavedRegsAndAdjustStack(); - void ABI_PopAllCalleeSavedRegsAndAdjustStack(); - - // A function that doesn't know anything about it's surroundings, should - // be surrounded by these to establish a safe environment, where it can roam free. - // An example is a backpatch injected function. - void ABI_PushAllCallerSavedRegsAndAdjustStack(); - void ABI_PopAllCallerSavedRegsAndAdjustStack(); - - unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); - void ABI_AlignStack(unsigned int frameSize); - void ABI_RestoreStack(unsigned int frameSize); - - // Sets up a __cdecl function. - // Only x64 really needs the parameter count. - void ABI_EmitPrologue(int maxCallParams); - void ABI_EmitEpilogue(int maxCallParams); - - #ifdef _M_IX86 - inline int ABI_GetNumXMMRegs() { return 8; } - #else - inline int ABI_GetNumXMMRegs() { return 16; } - #endif -}; // class XEmitter - - -// Everything that needs to generate X86 code should inherit from this. -// You get memory management for free, plus, you can use all the MOV etc functions without -// having to prefix them with gen-> or something similar. - -class XCodeBlock : public CodeBlock { -public: - void PoisonMemory() override; -}; - -} // namespace diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 544ed0297..221abc160 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -13,7 +13,6 @@ set(SRCS rasterizer.cpp shader/shader.cpp shader/shader_interpreter.cpp - shader/shader_jit.cpp utils.cpp video_core.cpp ) @@ -39,17 +38,16 @@ set(HEADERS renderer_base.h shader/shader.h shader/shader_interpreter.h - shader/shader_jit.h utils.h video_core.h ) -if(_M_X86_64) +if(ARCHITECTURE_X64) set(SRCS ${SRCS} shader/shader_jit_x64.cpp) -else() - set(SRCS ${SRCS} - shader/shader_jit_fake.cpp) + + set(HEADERS ${HEADERS} + shader/shader_jit_x64.h) endif() create_directory_groups(${SRCS} ${HEADERS}) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index fa1f7cafe..f7459e2ad 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -15,7 +15,10 @@ #include "shader.h" #include "shader_interpreter.h" -#include "shader_jit.h" + +#ifdef ARCHITECTURE_X64 +#include "shader_jit_x64.h" +#endif // ARCHITECTURE_X64 namespace Pica { @@ -43,7 +46,7 @@ void Setup(UnitState& state) { jit_shader = jit.Compile(); shader_map.emplace(cache_key, jit_shader); } - } +#endif // ARCHITECTURE_X64 } void Shutdown() { @@ -92,7 +95,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) RunInterpreter(state); #else RunInterpreter(state); -#endif +#endif // ARCHITECTURE_X64 #if PICA_DUMP_SHADERS DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), diff --git a/src/video_core/shader/shader_jit.cpp b/src/video_core/shader/shader_jit.cpp deleted file mode 100644 index 69fb7f6be..000000000 --- a/src/video_core/shader/shader_jit.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/pica.h" - -#include "shader.h" -#include "shader_jit.h" - -namespace Pica { - -namespace Shader { - -JitShader::JitShader() : jitted(nullptr) { -} - -void JitShader::DoJit(JitCompiler& jit) { - jitted = jit.Compile(); -} - -void JitShader::Run(UnitState& state) { - if (jitted) - jitted(&state); -} - -JitCompiler::JitCompiler() { - AllocCodeSpace(1024 * 1024 * 4); -} - -void JitCompiler::Clear() { - ClearCodeSpace(); -} - -} // namespace Shader - -} // namespace Pica diff --git a/src/video_core/shader/shader_jit.h b/src/video_core/shader/shader_jit.h deleted file mode 100644 index f05b64a92..000000000 --- a/src/video_core/shader/shader_jit.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#if defined(_M_X86_64) -#include "common/x64_emitter.h" -#else -#include "common/fake_emitter.h" -#endif - -#include "video_core/pica.h" - -#include "shader.h" - -using nihstro::Instruction; -using nihstro::OpCode; -using nihstro::SwizzlePattern; - -namespace Pica { - -namespace Shader { - -using CompiledShader = void(void* state); - -/** - * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 - * code that can be executed on the host machine directly. - */ -class JitCompiler : public Gen::XCodeBlock { -public: - JitCompiler(); - - CompiledShader* Compile(); - - void Clear(); - - void Compile_ADD(Instruction instr); - void Compile_DP3(Instruction instr); - void Compile_DP4(Instruction instr); - void Compile_MUL(Instruction instr); - void Compile_FLR(Instruction instr); - void Compile_MAX(Instruction instr); - void Compile_MIN(Instruction instr); - void Compile_RCP(Instruction instr); - void Compile_RSQ(Instruction instr); - void Compile_MOVA(Instruction instr); - void Compile_MOV(Instruction instr); - void Compile_SLTI(Instruction instr); - void Compile_NOP(Instruction instr); - void Compile_END(Instruction instr); - void Compile_CALL(Instruction instr); - void Compile_CALLC(Instruction instr); - void Compile_CALLU(Instruction instr); - void Compile_IF(Instruction instr); - void Compile_LOOP(Instruction instr); - void Compile_JMP(Instruction instr); - void Compile_CMP(Instruction instr); - void Compile_MAD(Instruction instr); - -private: - void Compile_Block(unsigned stop); - void Compile_NextInstr(unsigned* offset); - -#if defined(_M_X86_64) - void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); - void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); - - void Compile_EvaluateCondition(Instruction instr); - void Compile_UniformCondition(Instruction instr); -#endif - - /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. - unsigned* offset_ptr = nullptr; - - bool done = false; - bool looping = false; -}; - -} // Shader - -} // Pica diff --git a/src/video_core/shader/shader_jit_fake.cpp b/src/video_core/shader/shader_jit_fake.cpp deleted file mode 100644 index e1e79b733..000000000 --- a/src/video_core/shader/shader_jit_fake.cpp +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/fake_emitter.h" - -#include "video_core/shader/shader.h" -#include "video_core/shader/shader_jit.h" - -namespace Pica { - -namespace Shader { - -using namespace FakeGen; - -void Jit::Comp_ADD(Instruction instr) { -} - -void Jit::Comp_DP3(Instruction instr) { -} - -void Jit::Comp_DP4(Instruction instr) { -} - -void Jit::Comp_MUL(Instruction instr) { -} - -void Jit::Comp_FLR(Instruction instr) { -} - -void Jit::Comp_MAX(Instruction instr) { -} - -void Jit::Comp_MIN(Instruction instr) { -} - -void Jit::Comp_MOVA(Instruction instr) { -} - -void Jit::Comp_MOV(Instruction instr) { -} - -void Jit::Comp_SLTI(Instruction instr) { -} - -void Jit::Comp_RCP(Instruction instr) { -} - -void Jit::Comp_RSQ(Instruction instr) { -} - -void Jit::Comp_NOP(Instruction instr) { -} - -void Jit::Comp_END(Instruction instr) { -} - -void Jit::Comp_CALL(Instruction instr) { -} - -void Jit::Comp_CALLC(Instruction instr) { -} - -void Jit::Comp_CALLU(Instruction instr) { -} - -void Jit::Comp_CMP(Instruction instr) { -} - -void Jit::Comp_MAD(Instruction instr) { -} - -void Jit::Comp_IF(Instruction instr) { -} - -void Jit::Comp_LOOP(Instruction instr) { -} - -void Jit::Comp_JMP(Instruction instr) { -} - -void Jit::Comp_NextInstr(unsigned* offset) { -} - -CompiledShader Jit::Compile() { - return nullptr; -} - -} // namespace Shader - -} // namespace Pica diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index d69f3a549..1e20a06a7 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -4,12 +4,13 @@ #include -#include "common/abi.h" #include "common/cpu_detect.h" -#include "common/x64_emitter.h" + +#include "common/x64/abi.h" +#include "common/x64/emitter.h" #include "shader.h" -#include "shader_jit.h" +#include "shader_jit_x64.h" namespace Pica { @@ -134,7 +135,7 @@ static const u8 NO_DEST_REG_MASK = 0xf; */ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { X64Reg src_ptr; - std::size_t src_offset; + int src_offset; if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { src_ptr = UNIFORMS; @@ -451,7 +452,6 @@ void JitCompiler::Compile_NOP(Instruction instr) { void JitCompiler::Compile_END(Instruction instr) { ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); - done = true; } void JitCompiler::Compile_CALL(Instruction instr) { @@ -655,7 +655,7 @@ CompiledShader* JitCompiler::Compile() { MOVAPS(NEGBIT, MDisp(RAX, 0)); looping = false; - done = false; + while (offset < g_state.vs.program_code.size()) { Compile_NextInstr(&offset); } @@ -663,6 +663,14 @@ CompiledShader* JitCompiler::Compile() { return (CompiledShader*)start; } +JitCompiler::JitCompiler() { + AllocCodeSpace(1024 * 1024 * 4); +} + +void JitCompiler::Clear() { + ClearCodeSpace(); +} + } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h new file mode 100644 index 000000000..719a24210 --- /dev/null +++ b/src/video_core/shader/shader_jit_x64.h @@ -0,0 +1,79 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/x64/emitter.h" + +#include "video_core/pica.h" + +#include "shader.h" + +using nihstro::Instruction; +using nihstro::OpCode; +using nihstro::SwizzlePattern; + +namespace Pica { + +namespace Shader { + +using CompiledShader = void(void* state); + +/** + * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 + * code that can be executed on the host machine directly. + */ +class JitCompiler : public Gen::XCodeBlock { +public: + JitCompiler(); + + CompiledShader* Compile(); + + void Clear(); + + void Compile_ADD(Instruction instr); + void Compile_DP3(Instruction instr); + void Compile_DP4(Instruction instr); + void Compile_MUL(Instruction instr); + void Compile_FLR(Instruction instr); + void Compile_MAX(Instruction instr); + void Compile_MIN(Instruction instr); + void Compile_RCP(Instruction instr); + void Compile_RSQ(Instruction instr); + void Compile_MOVA(Instruction instr); + void Compile_MOV(Instruction instr); + void Compile_SLTI(Instruction instr); + void Compile_NOP(Instruction instr); + void Compile_END(Instruction instr); + void Compile_CALL(Instruction instr); + void Compile_CALLC(Instruction instr); + void Compile_CALLU(Instruction instr); + void Compile_IF(Instruction instr); + void Compile_LOOP(Instruction instr); + void Compile_JMP(Instruction instr); + void Compile_CMP(Instruction instr); + void Compile_MAD(Instruction instr); + +private: + void Compile_Block(unsigned stop); + void Compile_NextInstr(unsigned* offset); + + void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); + void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); + + void Compile_EvaluateCondition(Instruction instr); + void Compile_UniformCondition(Instruction instr); + + /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. + unsigned* offset_ptr = nullptr; + + /// Set to true if currently in a loop, used to check for the existence of nested loops + bool looping = false; +}; + +} // Shader + +} // Pica -- cgit v1.2.3