diff --git a/Source/Core/Common/Config/Config.cpp b/Source/Core/Common/Config/Config.cpp index 2211d0d5fb..29004787e4 100644 --- a/Source/Core/Common/Config/Config.cpp +++ b/Source/Core/Common/Config/Config.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "Common/Config/Config.h" diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index dc775ef607..0802a06039 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -133,13 +133,13 @@ const OpArg& Arm64GPRCache::GetGuestGPROpArg(size_t preg) const Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestGPR(size_t preg) { ASSERT(preg < GUEST_GPR_COUNT); - return {32, PPCSTATE_OFF(gpr[preg]), m_guest_registers[GUEST_GPR_OFFSET + preg]}; + return {32, PPCSTATE_OFF_GPR(preg), m_guest_registers[GUEST_GPR_OFFSET + preg]}; } Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestCR(size_t preg) { ASSERT(preg < GUEST_CR_COUNT); - return {64, PPCSTATE_OFF(cr.fields[preg]), m_guest_registers[GUEST_CR_OFFSET + preg]}; + return {64, PPCSTATE_OFF_CR(preg), m_guest_registers[GUEST_CR_OFFSET + preg]}; } Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestByIndex(size_t index) @@ -450,8 +450,9 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) { // Load the high 64bits from the file and insert them in to the high 64bits of the host // register - ARM64Reg tmp_reg = GetReg(); - m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps1))); + const ARM64Reg tmp_reg = GetReg(); + m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS1(preg))); m_float_emit->INS(64, host_reg, 1, tmp_reg, 0); UnlockRegister(tmp_reg); @@ -505,7 +506,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) } reg.SetDirty(false); m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, - u32(PPCSTATE_OFF(ps[preg].ps0))); + static_cast(PPCSTATE_OFF_PS0(preg))); return host_reg; } default: @@ -553,7 +554,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit // store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps0))); + m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS0(preg))); break; case REG_DUP_SINGLE: flush_reg = GetReg(); @@ -561,7 +563,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) [[fallthrough]]; case REG_DUP: // Store PSR1 (which is equal to PSR0) in memory. - m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps1))); + m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS1(preg))); break; default: // All other types doesn't store anything in PSR1. @@ -688,7 +691,7 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) if (dirty) { m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, - u32(PPCSTATE_OFF(ps[preg].ps0))); + static_cast(PPCSTATE_OFF_PS0(preg))); } if (!maintain_state) @@ -701,12 +704,18 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) { if (dirty) { - // If the paired registers were at the start of ppcState we could do an STP here. - // Too bad moving them would break savestate compatibility between x86_64 and AArch64 - // m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG, - // PPCSTATE_OFF(ps[preg].ps0)); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps0))); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps1))); + if (PPCSTATE_OFF_PS0(preg) <= 504) + { + m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS0(preg))); + } + else + { + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS0(preg))); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS1(preg))); + } } if (!maintain_state) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 9860e4843e..c1b71f1212 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -22,6 +22,18 @@ static const Arm64Gen::ARM64Reg DISPATCHER_PC = #define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) +#define PPCSTATE_OFF_ARRAY(elem, i) \ + (offsetof(PowerPC::PowerPCState, elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) + +#define PPCSTATE_OFF_GPR(i) PPCSTATE_OFF_ARRAY(gpr, i) +#define PPCSTATE_OFF_CR(i) PPCSTATE_OFF_ARRAY(cr.fields, i) +#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_ARRAY(sr, i) +#define PPCSTATE_OFF_SPR(i) PPCSTATE_OFF_ARRAY(spr, i) + +static_assert(std::is_same_v); +#define PPCSTATE_OFF_PS0(i) (PPCSTATE_OFF_ARRAY(ps, i) + offsetof(PowerPC::PairedSingle, ps0)) +#define PPCSTATE_OFF_PS1(i) (PPCSTATE_OFF_ARRAY(ps, i) + offsetof(PowerPC::PairedSingle, ps1)) + // Some asserts to make sure we will be able to load everything static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert((PPCSTATE_OFF(ps[0].ps0) % 8) == 0, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index d5889fd62c..427afe8c15 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -111,7 +111,7 @@ void JitArm64::mfsr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RD, false); - LDR(INDEX_UNSIGNED, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF(sr[inst.SR])); + LDR(INDEX_UNSIGNED, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF_SR(inst.SR)); } void JitArm64::mtsr(UGeckoInstruction inst) @@ -120,7 +120,7 @@ void JitArm64::mtsr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RS, true); - STR(INDEX_UNSIGNED, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(sr[inst.SR])); + STR(INDEX_UNSIGNED, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF_SR(inst.SR)); } void JitArm64::mfsrin(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index f6a63ee2e6..91da6b2643 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -77,7 +77,7 @@ void JitArm64::psq_l(UGeckoInstruction inst) } else { - LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); + //LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); UBFM(type_reg, scale_reg, 16, 18); // Type UBFM(scale_reg, scale_reg, 24, 29); // Scale @@ -179,7 +179,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) m_float_emit.FCVTN(32, D0, VS); } - LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); + //LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); UBFM(type_reg, scale_reg, 0, 2); // Type UBFM(scale_reg, scale_reg, 8, 13); // Scale diff --git a/Source/Core/DiscIO/WIACompression.cpp b/Source/Core/DiscIO/WIACompression.cpp index 20d19c4877..10d6d67c22 100644 --- a/Source/Core/DiscIO/WIACompression.cpp +++ b/Source/Core/DiscIO/WIACompression.cpp @@ -165,18 +165,18 @@ bool Bzip2Decompressor::Decompress(const DecompressionBuffer& in, DecompressionB m_started = true; } - constexpr auto clamped_cast = [](size_t x) { - return static_cast( - std::min(std::numeric_limits().max(), x)); - }; + //constexpr auto clamped_cast = [](size_t x) { + //return static_cast( + //std::min(std::numeric_limits().max(), x)); + //}; char* const in_ptr = reinterpret_cast(const_cast(in.data.data() + *in_bytes_read)); m_stream.next_in = in_ptr; - m_stream.avail_in = clamped_cast(in.bytes_written - *in_bytes_read); + //m_stream.avail_in = clamped_cast(in.bytes_written - *in_bytes_read); char* const out_ptr = reinterpret_cast(out->data.data() + out->bytes_written); m_stream.next_out = out_ptr; - m_stream.avail_out = clamped_cast(out->data.size() - out->bytes_written); + //m_stream.avail_out = clamped_cast(out->data.size() - out->bytes_written); const int result = BZ2_bzDecompress(&m_stream);