summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuri Kunde Schlesner <yuriks@yuriks.net>2017-06-29 18:29:40 +0200
committerGitHub <noreply@github.com>2017-06-29 18:29:40 +0200
commit686fde7e526e024716baa3aa3ba887d1a2479d41 (patch)
tree52fd95ec3fc77f42122783663a36ed6734f773db
parentMerge pull request #2800 from wwylele/fog-lutlutlut (diff)
parentgpu: add comments for TextureCopy (diff)
downloadyuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.tar
yuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.tar.gz
yuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.tar.bz2
yuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.tar.lz
yuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.tar.xz
yuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.tar.zst
yuzu-686fde7e526e024716baa3aa3ba887d1a2479d41.zip
-rw-r--r--src/core/hw/gpu.cpp41
-rw-r--r--src/core/hw/gpu.h2
2 files changed, 24 insertions, 19 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 42809c731..6838e449c 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -5,6 +5,7 @@
#include <cstring>
#include <numeric>
#include <type_traits>
+#include "common/alignment.h"
#include "common/color.h"
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -313,7 +314,7 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
const PAddr src_addr = config.GetPhysicalInputAddress();
const PAddr dst_addr = config.GetPhysicalOutputAddress();
- // TODO: do hwtest with these cases
+ // TODO: do hwtest with invalid addresses
if (!Memory::IsValidPhysicalAddress(src_addr)) {
LOG_CRITICAL(HW_GPU, "invalid input address 0x%08X", src_addr);
return;
@@ -324,31 +325,36 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
return;
}
- if (config.texture_copy.input_width == 0) {
- LOG_CRITICAL(HW_GPU, "zero input width");
+ if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
return;
- }
- if (config.texture_copy.output_width == 0) {
- LOG_CRITICAL(HW_GPU, "zero output width");
+ u8* src_pointer = Memory::GetPhysicalPointer(src_addr);
+ u8* dst_pointer = Memory::GetPhysicalPointer(dst_addr);
+
+ u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16);
+
+ if (remaining_size == 0) {
+ LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");
return;
}
- if (config.texture_copy.size == 0) {
- LOG_CRITICAL(HW_GPU, "zero size");
+ u32 input_gap = config.texture_copy.input_gap * 16;
+ u32 output_gap = config.texture_copy.output_gap * 16;
+
+ // Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width
+ // is assigned with the total size if gap = 0.
+ u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16;
+ u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16;
+
+ if (input_width == 0) {
+ LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");
return;
}
- if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
+ if (output_width == 0) {
+ LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");
return;
-
- u8* src_pointer = Memory::GetPhysicalPointer(src_addr);
- u8* dst_pointer = Memory::GetPhysicalPointer(dst_addr);
-
- u32 input_width = config.texture_copy.input_width * 16;
- u32 input_gap = config.texture_copy.input_gap * 16;
- u32 output_width = config.texture_copy.output_width * 16;
- u32 output_gap = config.texture_copy.output_gap * 16;
+ }
size_t contiguous_input_size =
config.texture_copy.size / input_width * (input_width + input_gap);
@@ -360,7 +366,6 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
static_cast<u32>(contiguous_output_size));
- u32 remaining_size = config.texture_copy.size;
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index bdd997b2a..21b127fee 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -225,7 +225,7 @@ struct Regs {
INSERT_PADDING_WORDS(0x1);
struct {
- u32 size;
+ u32 size; // The lower 4 bits are ignored
union {
u32 input_size;