summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-x.ci/scripts/clang/docker.sh18
-rw-r--r--.ci/scripts/clang/exec.sh8
-rw-r--r--.ci/scripts/clang/upload.sh20
-rwxr-xr-x.ci/scripts/windows/docker.sh5
-rw-r--r--.ci/templates/build-standard.yml5
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt146
-rw-r--r--CMakeModules/CopyYuzuFFmpegDeps.cmake8
-rw-r--r--README.md2
-rw-r--r--dist/qt_themes/colorful_dark/icons/index.theme2
-rw-r--r--dist/qt_themes/colorful_midnight_blue/icons/index.theme2
-rw-r--r--dist/qt_themes/qdarkstyle_midnight_blue/style.qss4
-rw-r--r--dist/yuzu.bmpbin0 -> 262282 bytes
m---------externals/dynarmic0
m---------externals/ffmpeg0
-rw-r--r--externals/find-modules/FindFFmpeg.cmake247
-rw-r--r--externals/glad/include/glad/glad.h3
-rw-r--r--externals/glad/src/glad.c2
-rw-r--r--src/audio_core/CMakeLists.txt2
-rw-r--r--src/audio_core/command_generator.cpp357
-rw-r--r--src/audio_core/command_generator.h5
-rw-r--r--src/audio_core/common.h23
-rw-r--r--src/audio_core/delay_line.cpp104
-rw-r--r--src/audio_core/delay_line.h46
-rw-r--r--src/audio_core/effect_context.cpp22
-rw-r--r--src/audio_core/effect_context.h31
-rw-r--r--src/audio_core/stream.cpp9
-rw-r--r--src/common/CMakeLists.txt4
-rw-r--r--src/common/alignment.h5
-rw-r--r--src/common/cityhash.cpp178
-rw-r--r--src/common/cityhash.h33
-rw-r--r--src/common/string_util.cpp14
-rw-r--r--src/common/tiny_mt.h250
-rw-r--r--src/common/uint128.cpp71
-rw-r--r--src/common/uint128.h105
-rw-r--r--src/common/wall_clock.cpp2
-rw-r--r--src/common/x64/native_clock.cpp58
-rw-r--r--src/core/CMakeLists.txt50
-rw-r--r--src/core/core.cpp3
-rw-r--r--src/core/core_timing_util.cpp84
-rw-r--r--src/core/core_timing_util.h61
-rw-r--r--src/core/frontend/applets/controller.h1
-rw-r--r--src/core/hle/kernel/client_port.cpp4
-rw-r--r--src/core/hle/kernel/client_session.cpp4
-rw-r--r--src/core/hle/kernel/errors.h43
-rw-r--r--src/core/hle/kernel/handle_table.cpp10
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp2
-rw-r--r--src/core/hle/kernel/k_address_arbiter.cpp28
-rw-r--r--src/core/hle/kernel/k_address_space_info.cpp (renamed from src/core/hle/kernel/memory/address_space_info.cpp)60
-rw-r--r--src/core/hle/kernel/k_address_space_info.h (renamed from src/core/hle/kernel/memory/address_space_info.h)15
-rw-r--r--src/core/hle/kernel/k_condition_variable.cpp20
-rw-r--r--src/core/hle/kernel/k_memory_block.h (renamed from src/core/hle/kernel/memory/memory_block.h)149
-rw-r--r--src/core/hle/kernel/k_memory_block_manager.cpp (renamed from src/core/hle/kernel/memory/memory_block_manager.cpp)60
-rw-r--r--src/core/hle/kernel/k_memory_block_manager.h (renamed from src/core/hle/kernel/memory/memory_block_manager.h)33
-rw-r--r--src/core/hle/kernel/k_memory_layout.h (renamed from src/core/hle/kernel/memory/memory_layout.h)34
-rw-r--r--src/core/hle/kernel/k_memory_manager.cpp (renamed from src/core/hle/kernel/memory/memory_manager.cpp)45
-rw-r--r--src/core/hle/kernel/k_memory_manager.h (renamed from src/core/hle/kernel/memory/memory_manager.h)49
-rw-r--r--src/core/hle/kernel/k_page_bitmap.h279
-rw-r--r--src/core/hle/kernel/k_page_heap.cpp (renamed from src/core/hle/kernel/memory/page_heap.cpp)23
-rw-r--r--src/core/hle/kernel/k_page_heap.h193
-rw-r--r--src/core/hle/kernel/k_page_linked_list.h (renamed from src/core/hle/kernel/memory/page_linked_list.h)14
-rw-r--r--src/core/hle/kernel/k_page_table.cpp (renamed from src/core/hle/kernel/memory/page_table.cpp)675
-rw-r--r--src/core/hle/kernel/k_page_table.h (renamed from src/core/hle/kernel/memory/page_table.h)98
-rw-r--r--src/core/hle/kernel/k_readable_event.cpp3
-rw-r--r--src/core/hle/kernel/k_resource_limit.cpp2
-rw-r--r--src/core/hle/kernel/k_scoped_resource_reservation.h67
-rw-r--r--src/core/hle/kernel/k_shared_memory.cpp65
-rw-r--r--src/core/hle/kernel/k_shared_memory.h (renamed from src/core/hle/kernel/shared_memory.h)24
-rw-r--r--src/core/hle/kernel/k_slab_heap.h (renamed from src/core/hle/kernel/memory/slab_heap.h)21
-rw-r--r--src/core/hle/kernel/k_spin_lock.cpp54
-rw-r--r--src/core/hle/kernel/k_spin_lock.h33
-rw-r--r--src/core/hle/kernel/k_synchronization_object.cpp8
-rw-r--r--src/core/hle/kernel/k_system_control.cpp (renamed from src/core/hle/kernel/memory/system_control.cpp)18
-rw-r--r--src/core/hle/kernel/k_system_control.h19
-rw-r--r--src/core/hle/kernel/k_thread.cpp38
-rw-r--r--src/core/hle/kernel/kernel.cpp114
-rw-r--r--src/core/hle/kernel/kernel.h44
-rw-r--r--src/core/hle/kernel/memory/page_heap.h370
-rw-r--r--src/core/hle/kernel/memory/system_control.h13
-rw-r--r--src/core/hle/kernel/memory_types.h (renamed from src/core/hle/kernel/memory/memory_types.h)4
-rw-r--r--src/core/hle/kernel/process.cpp89
-rw-r--r--src/core/hle/kernel/process.h11
-rw-r--r--src/core/hle/kernel/process_capability.cpp48
-rw-r--r--src/core/hle/kernel/process_capability.h16
-rw-r--r--src/core/hle/kernel/server_port.cpp4
-rw-r--r--src/core/hle/kernel/session.cpp11
-rw-r--r--src/core/hle/kernel/shared_memory.cpp57
-rw-r--r--src/core/hle/kernel/svc.cpp293
-rw-r--r--src/core/hle/kernel/svc_results.h21
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp6
-rw-r--r--src/core/hle/kernel/transfer_memory.h6
-rw-r--r--src/core/hle/service/acc/acc.cpp18
-rw-r--r--src/core/hle/service/am/am.cpp13
-rw-r--r--src/core/hle/service/am/applets/controller.cpp3
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp4
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp63
-rw-r--r--src/core/hle/service/hid/controllers/npad.h26
-rw-r--r--src/core/hle/service/hid/hid.cpp126
-rw-r--r--src/core/hle/service/hid/hid.h14
-rw-r--r--src/core/hle/service/hid/irs.cpp2
-rw-r--r--src/core/hle/service/hid/irs.h4
-rw-r--r--src/core/hle/service/ldn/errors.h13
-rw-r--r--src/core/hle/service/ldn/ldn.cpp36
-rw-r--r--src/core/hle/service/ldr/ldr.cpp36
-rw-r--r--src/core/hle/service/nfp/nfp.cpp2
-rw-r--r--src/core/hle/service/ns/pl_u.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp7
-rw-r--r--src/core/hle/service/olsc/olsc.cpp13
-rw-r--r--src/core/hle/service/sockets/bsd.cpp6
-rw-r--r--src/core/hle/service/time/time_manager.cpp4
-rw-r--r--src/core/hle/service/time/time_manager.h2
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp2
-rw-r--r--src/core/hle/service/time/time_sharedmemory.h6
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp2
-rw-r--r--src/core/loader/elf.cpp2
-rw-r--r--src/core/loader/kip.cpp2
-rw-r--r--src/core/loader/nro.cpp2
-rw-r--r--src/core/loader/nso.cpp2
-rw-r--r--src/core/memory.cpp2
-rw-r--r--src/core/memory.h10
-rw-r--r--src/core/memory/cheat_engine.cpp2
-rw-r--r--src/core/reporter.cpp2
-rw-r--r--src/core/settings.h5
-rw-r--r--src/input_common/mouse/mouse_input.cpp48
-rw-r--r--src/input_common/mouse/mouse_input.h7
-rw-r--r--src/input_common/mouse/mouse_poller.cpp3
-rw-r--r--src/input_common/settings.h1
-rw-r--r--src/input_common/udp/client.cpp4
-rw-r--r--src/input_common/udp/client.h3
-rw-r--r--src/input_common/udp/udp.cpp4
-rw-r--r--src/tests/CMakeLists.txt1
-rw-r--r--src/tests/common/cityhash.cpp22
-rw-r--r--src/tests/video_core/buffer_base.cpp76
-rw-r--r--src/video_core/CMakeLists.txt17
-rw-r--r--src/video_core/buffer_cache/buffer_base.h217
-rw-r--r--src/video_core/buffer_cache/buffer_block.h62
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1656
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h93
-rw-r--r--src/video_core/cdma_pusher.cpp63
-rw-r--r--src/video_core/cdma_pusher.h33
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp7
-rw-r--r--src/video_core/command_classes/nvdec.cpp8
-rw-r--r--src/video_core/command_classes/nvdec.h2
-rw-r--r--src/video_core/command_classes/vic.cpp48
-rw-r--r--src/video_core/command_classes/vic.h51
-rw-r--r--src/video_core/dirty_flags.cpp29
-rw-r--r--src/video_core/dirty_flags.h8
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp4
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/kepler_memory.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp21
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/fence_manager.h4
-rw-r--r--src/video_core/gpu.cpp14
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/gpu_thread.cpp15
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_quad_array.comp28
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp9
-rw-r--r--src/video_core/memory_manager.cpp6
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_base.h17
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h160
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_device.h11
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp589
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h66
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp61
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h60
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp69
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h36
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h38
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h18
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp43
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h9
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp83
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h18
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp153
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h46
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp394
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h121
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp144
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h27
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp674
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h67
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp40
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp156
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp43
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp139
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h27
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h28
-rw-r--r--src/video_core/shader/async_shaders.cpp1
-rw-r--r--src/video_core/shader/async_shaders.h9
-rw-r--r--src/video_core/shader/decode/memory.cpp1
-rw-r--r--src/video_core/shader/decode/other.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp11
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/shader_notify.cpp1
-rw-r--r--src/video_core/texture_cache/texture_cache.h58
-rw-r--r--src/video_core/texture_cache/util.cpp34
-rw-r--r--src/video_core/video_core.cpp19
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp213
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h7
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp78
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp50
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h37
-rw-r--r--src/yuzu/CMakeLists.txt1
-rw-r--r--src/yuzu/applets/controller.cpp9
-rw-r--r--src/yuzu/bootmanager.cpp16
-rw-r--r--src/yuzu/configuration/config.cpp27
-rw-r--r--src/yuzu/configuration/config.h2
-rw-r--r--src/yuzu/configuration/configure_filesystem.cpp10
-rw-r--r--src/yuzu/configuration/configure_filesystem.h1
-rw-r--r--src/yuzu/configuration/configure_filesystem.ui35
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp22
-rw-r--r--src/yuzu/configuration/configure_input_advanced.cpp5
-rw-r--r--src/yuzu/configuration/configure_input_advanced.ui82
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp106
-rw-r--r--src/yuzu/configuration/configure_input_player.h6
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.cpp282
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.h8
-rw-r--r--src/yuzu/debugger/controller.cpp2
-rw-r--r--src/yuzu/main.cpp39
-rw-r--r--src/yuzu/main.ui8
-rw-r--r--src/yuzu/yuzu.qrc5
-rw-r--r--src/yuzu_cmd/CMakeLists.txt13
-rw-r--r--src/yuzu_cmd/config.cpp5
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp20
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.h3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp2
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
272 files changed, 7926 insertions, 5731 deletions
diff --git a/.ci/scripts/clang/docker.sh b/.ci/scripts/clang/docker.sh
new file mode 100755
index 000000000..885d74e97
--- /dev/null
+++ b/.ci/scripts/clang/docker.sh
@@ -0,0 +1,18 @@
+#!/bin/bash -ex
+
+# Exit on error, rather than continuing with the rest of the script.
+set -e
+
+cd /yuzu
+
+ccache -s
+
+mkdir build || true && cd build
+cmake .. -DDISPLAY_VERSION=$1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/clang -DCMAKE_CXX_COMPILER=/usr/lib/ccache/clang++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DCMAKE_INSTALL_PREFIX="/usr"
+
+make -j$(nproc)
+
+ccache -s
+
+ctest -VV -C Release
+
diff --git a/.ci/scripts/clang/exec.sh b/.ci/scripts/clang/exec.sh
new file mode 100644
index 000000000..e56cd4325
--- /dev/null
+++ b/.ci/scripts/clang/exec.sh
@@ -0,0 +1,8 @@
+#!/bin/bash -ex
+
+mkdir -p "ccache" || true
+chmod a+x ./.ci/scripts/clang/docker.sh
+# the UID for the container yuzu user is 1027
+sudo chown -R 1027 ./
+docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/clang/docker.sh $1
+sudo chown -R $UID ./
diff --git a/.ci/scripts/clang/upload.sh b/.ci/scripts/clang/upload.sh
new file mode 100644
index 000000000..fe4e6b2ac
--- /dev/null
+++ b/.ci/scripts/clang/upload.sh
@@ -0,0 +1,20 @@
+#!/bin/bash -ex
+
+. .ci/scripts/common/pre-upload.sh
+
+REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
+ARCHIVE_NAME="${REV_NAME}.tar.xz"
+COMPRESSION_FLAGS="-cJvf"
+
+if [ "${RELEASE_NAME}" = "mainline" ]; then
+ DIR_NAME="${REV_NAME}"
+else
+ DIR_NAME="${REV_NAME}_${RELEASE_NAME}"
+fi
+
+mkdir "$DIR_NAME"
+
+cp build/bin/yuzu-cmd "$DIR_NAME"
+cp build/bin/yuzu "$DIR_NAME"
+
+. .ci/scripts/common/post-upload.sh
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh
index 2bc9f36ab..192a01fd8 100755
--- a/.ci/scripts/windows/docker.sh
+++ b/.ci/scripts/windows/docker.sh
@@ -42,3 +42,8 @@ done
pip3 install pefile
python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/"
python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/"
+
+# copy FFmpeg libraries
+EXTERNALS_PATH="$(pwd)/build/externals"
+FFMPEG_DLL_PATH="$(find ${EXTERNALS_PATH} -maxdepth 1 -type d | grep ffmpeg)/bin"
+find ${FFMPEG_DLL_PATH} -type f -regex ".*\.dll" -exec cp -v {} package/ ';'
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
index 7422c8346..57d36f813 100644
--- a/.ci/templates/build-standard.yml
+++ b/.ci/templates/build-standard.yml
@@ -12,6 +12,9 @@ jobs:
windows:
BuildSuffix: 'windows-mingw'
ScriptFolder: 'windows'
+ clang:
+ BuildSuffix: 'clang'
+ ScriptFolder: 'clang'
linux:
BuildSuffix: 'linux'
ScriptFolder: 'linux'
@@ -24,4 +27,4 @@ jobs:
parameters:
artifactSource: 'false'
cache: $(parameters.cache)
- version: $(parameters.version) \ No newline at end of file
+ version: $(parameters.version)
diff --git a/.gitmodules b/.gitmodules
index 41022615b..93ba9b930 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,6 @@
[submodule "opus"]
path = externals/opus/opus
url = https://github.com/xiph/opus.git
+[submodule "ffmpeg"]
+ path = externals/ffmpeg
+ url = https://git.ffmpeg.org/ffmpeg.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 27aa56780..ac7c3ce90 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,8 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "EN
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
+CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_FFMPEG "Download/Build bundled yuzu" ON "WIN32" OFF)
+
option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OFF)
option(YUZU_ENABLE_BOXCAT "Enable the Boxcat service, a yuzu high-level implementation of BCAT" ON)
@@ -384,19 +386,141 @@ if (NOT LIBUSB_FOUND)
set(LIBUSB_LIBRARIES usb)
endif()
-# Use system installed ffmpeg.
-if (NOT MSVC)
- find_package(FFmpeg REQUIRED)
-else()
- set(FFMPEG_EXT_NAME "ffmpeg-4.2.1")
- set(FFMPEG_PATH "${CMAKE_BINARY_DIR}/externals/${FFMPEG_EXT_NAME}")
- download_bundled_external("ffmpeg/" ${FFMPEG_EXT_NAME} "")
- set(FFMPEG_FOUND YES)
- set(FFMPEG_INCLUDE_DIR "${FFMPEG_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
- set(FFMPEG_LIBRARY_DIR "${FFMPEG_PATH}/bin" CACHE PATH "Path to FFmpeg library" FORCE)
- set(FFMPEG_DLL_DIR "${FFMPEG_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
+# List of all FFmpeg components required
+set(FFmpeg_COMPONENTS
+ avcodec
+ avutil
+ swscale)
+
+if (NOT YUZU_USE_BUNDLED_FFMPEG)
+ # Use system installed FFmpeg
+ find_package(FFmpeg REQUIRED COMPONENTS ${FFmpeg_COMPONENTS})
+
+ if (FFmpeg_FOUND)
+ # Overwrite aggregate defines from FFmpeg module to avoid over-linking libraries.
+ # Prevents shipping too many libraries with the AppImage.
+ set(FFmpeg_LIBRARIES "")
+ set(FFmpeg_INCLUDE_DIR "")
+
+ foreach(COMPONENT ${FFmpeg_COMPONENTS})
+ set(FFmpeg_LIBRARIES ${FFmpeg_LIBRARIES} ${FFmpeg_LIBRARY_${COMPONENT}} CACHE PATH "Paths to FFmpeg libraries" FORCE)
+ set(FFmpeg_INCLUDE_DIR ${FFmpeg_INCLUDE_DIR} ${FFmpeg_INCLUDE_${COMPONENT}} CACHE PATH "Path to FFmpeg headers" FORCE)
+ endforeach()
+ else()
+ message(WARNING "FFmpeg not found, falling back to externals")
+ set(YUZU_USE_BUNDLED_FFMPEG ON)
+ endif()
+endif()
+
+if (YUZU_USE_BUNDLED_FFMPEG)
+ if (NOT WIN32)
+ # Build FFmpeg from externals
+ message(STATUS "Using FFmpeg from externals")
+
+ # FFmpeg has source that requires one of nasm or yasm to assemble it.
+ # REQUIRED throws an error if not found here during configuration rather than during compilation.
+ find_program(ASSEMBLER NAMES nasm yasm REQUIRED)
+
+ set(FFmpeg_PREFIX ${PROJECT_SOURCE_DIR}/externals/ffmpeg)
+ set(FFmpeg_BUILD_DIR ${PROJECT_BINARY_DIR}/externals/ffmpeg)
+ set(FFmpeg_MAKEFILE ${FFmpeg_BUILD_DIR}/Makefile)
+ make_directory(${FFmpeg_BUILD_DIR})
+
+ # Read version string from external
+ file(READ ${FFmpeg_PREFIX}/RELEASE FFmpeg_VERSION)
+ set(FFmpeg_FOUND NO)
+ if (NOT FFmpeg_VERSION STREQUAL "")
+ set(FFmpeg_FOUND YES)
+ endif()
+
+ foreach(COMPONENT ${FFmpeg_COMPONENTS})
+ set(FFmpeg_${COMPONENT}_PREFIX "${FFmpeg_BUILD_DIR}/lib${COMPONENT}")
+ set(FFmpeg_${COMPONENT}_LIB_NAME "lib${COMPONENT}.a")
+ set(FFmpeg_${COMPONENT}_LIBRARY "${FFmpeg_${COMPONENT}_PREFIX}/${FFmpeg_${COMPONENT}_LIB_NAME}")
+
+ set(FFmpeg_LIBRARIES
+ ${FFmpeg_LIBRARIES}
+ ${FFmpeg_${COMPONENT}_LIBRARY}
+ CACHE PATH "Paths to FFmpeg libraries" FORCE)
+ endforeach()
+
+ set(FFmpeg_INCLUDE_DIR
+ ${FFmpeg_PREFIX}
+ CACHE PATH "Path to FFmpeg headers" FORCE)
+
+ # `configure` parameters builds only exactly what yuzu needs from FFmpeg
+ # `--disable-{vaapi,vdpau}` is needed to avoid linking issues
+ add_custom_command(
+ OUTPUT
+ ${FFmpeg_MAKEFILE}
+ COMMAND
+ /bin/bash ${FFmpeg_PREFIX}/configure
+ --disable-avdevice
+ --disable-avfilter
+ --disable-avformat
+ --disable-doc
+ --disable-everything
+ --disable-ffmpeg
+ --disable-ffprobe
+ --disable-network
+ --disable-postproc
+ --disable-swresample
+ --disable-vaapi
+ --disable-vdpau
+ --enable-decoder=h264
+ --enable-decoder=vp9
+ WORKING_DIRECTORY
+ ${FFmpeg_BUILD_DIR}
+ )
+
+ # Workaround for Ubuntu 18.04's older version of make not being able to call make as a child
+ # with context of the jobserver. Also helps ninja users.
+ execute_process(
+ COMMAND
+ nproc
+ OUTPUT_VARIABLE
+ SYSTEM_THREADS)
+
+ add_custom_command(
+ OUTPUT
+ ${FFmpeg_LIBRARIES}
+ COMMAND
+ make -j${SYSTEM_THREADS}
+ WORKING_DIRECTORY
+ ${FFmpeg_BUILD_DIR}
+ )
+
+ # ALL makes this custom target build every time
+ # but it won't actually build if the DEPENDS parameter is up to date
+ add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_LIBRARIES})
+ add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
+
+ if (FFmpeg_FOUND)
+ message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
+
+ add_dependencies(ffmpeg-build ffmpeg-configure)
+ else()
+ message(FATAL_ERROR "FFmpeg not found")
+ endif()
+ else() # WIN32
+ # Use yuzu FFmpeg binaries
+ set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
+ set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
+ download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
+ set(FFmpeg_FOUND YES)
+ set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
+ set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
+ set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
+ set(FFmpeg_LIBRARIES
+ ${FFmpeg_LIBRARY_DIR}/swscale.lib
+ ${FFmpeg_LIBRARY_DIR}/avcodec.lib
+ ${FFmpeg_LIBRARY_DIR}/avutil.lib
+ CACHE PATH "Paths to FFmpeg libraries" FORCE)
+ endif()
endif()
+unset(FFmpeg_COMPONENTS)
+
# Prefer the -pthread flag on Linux.
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
diff --git a/CMakeModules/CopyYuzuFFmpegDeps.cmake b/CMakeModules/CopyYuzuFFmpegDeps.cmake
index cca1eeeab..26384e8b8 100644
--- a/CMakeModules/CopyYuzuFFmpegDeps.cmake
+++ b/CMakeModules/CopyYuzuFFmpegDeps.cmake
@@ -1,10 +1,6 @@
function(copy_yuzu_FFmpeg_deps target_dir)
include(WindowsCopyFiles)
set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/")
- windows_copy_files(${target_dir} ${FFMPEG_DLL_DIR} ${DLL_DEST}
- avcodec-58.dll
- avutil-56.dll
- swresample-3.dll
- swscale-5.dll
- )
+ file(READ "${FFmpeg_PATH}/requirements.txt" FFmpeg_REQUIRED_DLLS)
+ windows_copy_files(${target_dir} ${FFmpeg_DLL_DIR} ${DLL_DEST} ${FFmpeg_REQUIRED_DLLS})
endfunction(copy_yuzu_FFmpeg_deps)
diff --git a/README.md b/README.md
index fbf62eb7c..cb1a64d8c 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ If you want to contribute to the user interface translation, please check out th
### Support
-We happily accept monetary donations or donated games and hardware. Please see our [donations page](https://yuzu-emu.org/donate/) for more information on how you can contribute to yuzu. Any donations received will go towards things like:
+We happily accept monetary donations, or donated games and hardware. Please see our [donations page](https://yuzu-emu.org/donate/) for more information on how you can contribute to yuzu. Any donations received will go towards things like:
* Switch consoles to explore and reverse-engineer the hardware
* Switch games for testing, reverse-engineering, and implementing new features
* Web hosting and infrastructure setup
diff --git a/dist/qt_themes/colorful_dark/icons/index.theme b/dist/qt_themes/colorful_dark/icons/index.theme
index 94d5ae8aa..19dc0369a 100644
--- a/dist/qt_themes/colorful_dark/icons/index.theme
+++ b/dist/qt_themes/colorful_dark/icons/index.theme
@@ -1,7 +1,7 @@
[Icon Theme]
Name=colorful_dark
Comment=Colorful theme (Dark style)
-Inherits=default
+Inherits=colorful
Directories=16x16
[16x16]
diff --git a/dist/qt_themes/colorful_midnight_blue/icons/index.theme b/dist/qt_themes/colorful_midnight_blue/icons/index.theme
index e23bfe6f9..dcb2c50d6 100644
--- a/dist/qt_themes/colorful_midnight_blue/icons/index.theme
+++ b/dist/qt_themes/colorful_midnight_blue/icons/index.theme
@@ -1,7 +1,7 @@
[Icon Theme]
Name=colorful_midnight_blue
Comment=Colorful theme (Midnight Blue style)
-Inherits=default
+Inherits=colorful
Directories=16x16
[16x16]
diff --git a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
index 70e540b06..a64037455 100644
--- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
+++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
@@ -1257,10 +1257,6 @@ QComboBox::item:alternate {
background: #19232D;
}
-QComboBox::item:checked {
- font-weight: bold;
-}
-
QComboBox::item:selected {
border: 0px solid transparent;
}
diff --git a/dist/yuzu.bmp b/dist/yuzu.bmp
new file mode 100644
index 000000000..66f2f696f
--- /dev/null
+++ b/dist/yuzu.bmp
Binary files differ
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject 8c09da666aa3f0bb1000b0b6c5d5b0a1876f306
+Subproject cafa687684a3e5dbe86be7150c0f8183d2ad53c
diff --git a/externals/ffmpeg b/externals/ffmpeg
new file mode 160000
+Subproject 6b6b9e593dd4d3aaf75f48d40a13ef03bdef9fd
diff --git a/externals/find-modules/FindFFmpeg.cmake b/externals/find-modules/FindFFmpeg.cmake
index 77b331e00..61b6dc8d2 100644
--- a/externals/find-modules/FindFFmpeg.cmake
+++ b/externals/find-modules/FindFFmpeg.cmake
@@ -1,100 +1,187 @@
-# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
-# Once done this will define
+# FindFFmpeg
+# ----------
#
-# FFMPEG_FOUND - system has ffmpeg or libav
-# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
-# FFMPEG_LIBRARIES - Link these to use ffmpeg
-# FFMPEG_LIBAVCODEC
-# FFMPEG_LIBAVFORMAT
-# FFMPEG_LIBAVUTIL
+# Copyright 2019 Citra Emulator Project
+# Licensed under GPLv2 or any later version
#
-# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
-# Modified for other libraries by Lasse Kärkkäinen <tronic>
-# Modified for Hedgewars by Stepik777
-# Modified for FFmpeg-example Tuukka Pasanen 2018
-# Modified for yuzu toastUnlimted 2020
+# Find the native FFmpeg includes and libraries
#
-# Redistribution and use is allowed according to the terms of the New
-# BSD license.
+# This module defines the following variables:
+#
+# FFmpeg_INCLUDE_<component>: where to find <component>.h
+# FFmpeg_LIBRARY_<component>: where to find the <component> library
+# FFmpeg_INCLUDE_DIR: aggregate all the include paths
+# FFmpeg_LIBRARIES: aggregate all the paths to the libraries
+# FFmpeg_FOUND: True if all components have been found
+#
+# This module defines the following targets, which are prefered over variables:
+#
+# FFmpeg::<component>: Target to use <component> directly, with include path,
+# library and dependencies set up. If you are using a static build, you are
+# responsible for adding any external dependencies (such as zlib, bzlib...).
+#
+# <component> can be one of:
+# avcodec
+# avdevice
+# avfilter
+# avformat
+# avutil
+# postproc
+# swresample
+# swscale
#
-include(FindPackageHandleStandardArgs)
-
-find_package_handle_standard_args(FFMPEG
- FOUND_VAR FFMPEG_FOUND
- REQUIRED_VARS
- FFMPEG_LIBRARY
- FFMPEG_INCLUDE_DIR
- VERSION_VAR FFMPEG_VERSION
+set(_FFmpeg_ALL_COMPONENTS
+ avcodec
+ avdevice
+ avfilter
+ avformat
+ avutil
+ postproc
+ swresample
+ swscale
)
-if(FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
- # in cache already
- set(FFMPEG_FOUND TRUE)
-else()
- # use pkg-config to get the directories and then use these values
- # in the FIND_PATH() and FIND_LIBRARY() calls
- find_package(PkgConfig)
- if(PKG_CONFIG_FOUND)
- pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
- pkg_check_modules(_FFMPEG_AVUTIL libavutil)
- pkg_check_modules(_FFMPEG_SWSCALE libswscale)
+set(_FFmpeg_DEPS_avcodec avutil)
+set(_FFmpeg_DEPS_avdevice avcodec avformat avutil)
+set(_FFmpeg_DEPS_avfilter avutil)
+set(_FFmpeg_DEPS_avformat avcodec avutil)
+set(_FFmpeg_DEPS_postproc avutil)
+set(_FFmpeg_DEPS_swresample avutil)
+set(_FFmpeg_DEPS_swscale avutil)
+
+function(find_ffmpeg LIBNAME)
+ if(DEFINED ENV{FFMPEG_DIR})
+ set(FFMPEG_DIR $ENV{FFMPEG_DIR})
endif()
- find_path(FFMPEG_AVCODEC_INCLUDE_DIR
- NAMES libavcodec/avcodec.h
- PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS}
- /usr/include
- /usr/local/include
- /opt/local/include
- /sw/include
- PATH_SUFFIXES ffmpeg libav)
-
- find_library(FFMPEG_LIBAVCODEC
- NAMES avcodec
- PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS}
- /usr/lib
- /usr/local/lib
- /opt/local/lib
- /sw/lib)
+ if(FFMPEG_DIR)
+ list(APPEND INCLUDE_PATHS
+ ${FFMPEG_DIR}
+ ${FFMPEG_DIR}/ffmpeg
+ ${FFMPEG_DIR}/lib${LIBNAME}
+ ${FFMPEG_DIR}/include/lib${LIBNAME}
+ ${FFMPEG_DIR}/include/ffmpeg
+ ${FFMPEG_DIR}/include
+ NO_DEFAULT_PATH
+ NO_CMAKE_FIND_ROOT_PATH
+ )
+ list(APPEND LIB_PATHS
+ ${FFMPEG_DIR}
+ ${FFMPEG_DIR}/lib
+ ${FFMPEG_DIR}/lib${LIBNAME}
+ NO_DEFAULT_PATH
+ NO_CMAKE_FIND_ROOT_PATH
+ )
+ else()
+ list(APPEND INCLUDE_PATHS
+ /usr/local/include/ffmpeg
+ /usr/local/include/lib${LIBNAME}
+ /usr/include/ffmpeg
+ /usr/include/lib${LIBNAME}
+ /usr/include/ffmpeg/lib${LIBNAME}
+ )
- find_library(FFMPEG_LIBAVUTIL
- NAMES avutil
- PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS}
- /usr/lib
+ list(APPEND LIB_PATHS
/usr/local/lib
- /opt/local/lib
- /sw/lib)
-
- find_library(FFMPEG_LIBSWSCALE
- NAMES swscale
- PATHS ${_FFMPEG_SWSCALE_LIBRARY_DIRS}
/usr/lib
- /usr/local/lib
- /opt/local/lib
- /sw/lib)
-
- if(FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVUTIL AND FFMPEG_LIBSWSCALE)
- set(FFMPEG_FOUND TRUE)
+ )
endif()
- if(FFMPEG_FOUND)
- set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
- set(FFMPEG_LIBRARIES
- ${FFMPEG_LIBAVCODEC}
- ${FFMPEG_LIBAVUTIL}
- ${FFMPEG_LIBSWSCALE})
+ find_path(FFmpeg_INCLUDE_${LIBNAME} lib${LIBNAME}/${LIBNAME}.h
+ HINTS ${INCLUDE_PATHS}
+ )
+
+ find_library(FFmpeg_LIBRARY_${LIBNAME} ${LIBNAME}
+ HINTS ${LIB_PATHS}
+ )
+
+ if(NOT FFMPEG_DIR AND (NOT FFmpeg_LIBRARY_${LIBNAME} OR NOT FFmpeg_INCLUDE_${LIBNAME}))
+ # Didn't find it in the usual paths, try pkg-config
+ find_package(PkgConfig QUIET)
+ pkg_check_modules(FFmpeg_PKGCONFIG_${LIBNAME} QUIET lib${LIBNAME})
+
+ find_path(FFmpeg_INCLUDE_${LIBNAME} lib${LIBNAME}/${LIBNAME}.h
+ ${FFmpeg_PKGCONFIG_${LIBNAME}_INCLUDE_DIRS}
+ )
+
+ find_library(FFmpeg_LIBRARY_${LIBNAME} ${LIBNAME}
+ ${FFmpeg_PKGCONFIG_${LIBNAME}_LIBRARY_DIRS}
+ )
endif()
- if(FFMPEG_FOUND)
- if(NOT FFMPEG_FIND_QUIETLY)
- message(STATUS
- "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
+ if(FFmpeg_INCLUDE_${LIBNAME} AND FFmpeg_LIBRARY_${LIBNAME})
+ set(FFmpeg_INCLUDE_${LIBNAME} "${FFmpeg_INCLUDE_${LIBNAME}}" PARENT_SCOPE)
+ set(FFmpeg_LIBRARY_${LIBNAME} "${FFmpeg_LIBRARY_${LIBNAME}}" PARENT_SCOPE)
+
+ # Extract FFmpeg version from version.h
+ foreach(v MAJOR MINOR MICRO)
+ set(FFmpeg_${LIBNAME}_VERSION_${v} 0)
+ endforeach()
+ string(TOUPPER ${LIBNAME} LIBNAME_UPPER)
+ file(STRINGS "${FFmpeg_INCLUDE_${LIBNAME}}/lib${LIBNAME}/version.h" _FFmpeg_VERSION_H_CONTENTS REGEX "#define LIB${LIBNAME_UPPER}_VERSION_(MAJOR|MINOR|MICRO) ")
+ set(_FFmpeg_VERSION_REGEX "([0-9]+)")
+ foreach(v MAJOR MINOR MICRO)
+ if("${_FFmpeg_VERSION_H_CONTENTS}" MATCHES "#define LIB${LIBNAME_UPPER}_VERSION_${v}[\\t ]+${_FFmpeg_VERSION_REGEX}")
+ set(FFmpeg_${LIBNAME}_VERSION_${v} "${CMAKE_MATCH_1}")
+ endif()
+ endforeach()
+ set(FFmpeg_${LIBNAME}_VERSION "${FFmpeg_${LIBNAME}_VERSION_MAJOR}.${FFmpeg_${LIBNAME}_VERSION_MINOR}.${FFmpeg_${LIBNAME}_VERSION_MICRO}")
+ set(FFmpeg_${c}_VERSION "${FFmpeg_${LIBNAME}_VERSION}" PARENT_SCOPE)
+ unset(_FFmpeg_VERSION_REGEX)
+ unset(_FFmpeg_VERSION_H_CONTENTS)
+
+ set(FFmpeg_${c}_FOUND TRUE PARENT_SCOPE)
+ if(NOT FFmpeg_FIND_QUIETLY)
+ message("-- Found ${LIBNAME}: ${FFmpeg_INCLUDE_${LIBNAME}} ${FFmpeg_LIBRARY_${LIBNAME}} (version: ${FFmpeg_${LIBNAME}_VERSION})")
endif()
- else()
- if(FFMPEG_FIND_REQUIRED)
- message(FATAL_ERROR
- "Could not find libavcodec or libavutil or libswscale")
+ endif()
+endfunction()
+
+foreach(c ${_FFmpeg_ALL_COMPONENTS})
+ find_ffmpeg(${c})
+endforeach()
+
+foreach(c ${_FFmpeg_ALL_COMPONENTS})
+ if(FFmpeg_${c}_FOUND)
+ list(APPEND FFmpeg_INCLUDE_DIR ${FFmpeg_INCLUDE_${c}})
+ list(APPEND FFmpeg_LIBRARIES ${FFmpeg_LIBRARY_${c}})
+
+ add_library(FFmpeg::${c} IMPORTED UNKNOWN)
+ set_target_properties(FFmpeg::${c} PROPERTIES
+ IMPORTED_LOCATION ${FFmpeg_LIBRARY_${c}}
+ INTERFACE_INCLUDE_DIRECTORIES ${FFmpeg_INCLUDE_${c}}
+ )
+ if(_FFmpeg_DEPS_${c})
+ set(deps)
+ foreach(dep ${_FFmpeg_DEPS_${c}})
+ list(APPEND deps FFmpeg::${dep})
+ endforeach()
+
+ set_target_properties(FFmpeg::${c} PROPERTIES
+ INTERFACE_LINK_LIBRARIES "${deps}"
+ )
+ unset(deps)
endif()
endif()
+endforeach()
+
+if(FFmpeg_INCLUDE_DIR)
+ list(REMOVE_DUPLICATES FFmpeg_INCLUDE_DIR)
endif()
+
+foreach(c ${FFmpeg_FIND_COMPONENTS})
+ list(APPEND _FFmpeg_REQUIRED_VARS FFmpeg_INCLUDE_${c} FFmpeg_LIBRARY_${c})
+endforeach()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FFmpeg
+ REQUIRED_VARS ${_FFmpeg_REQUIRED_VARS}
+ HANDLE_COMPONENTS
+)
+
+foreach(c ${_FFmpeg_ALL_COMPONENTS})
+ unset(_FFmpeg_DEPS_${c})
+endforeach()
+unset(_FFmpeg_ALL_COMPONENTS)
+unset(_FFmpeg_REQUIRED_VARS)
diff --git a/externals/glad/include/glad/glad.h b/externals/glad/include/glad/glad.h
index 6e16358ea..191bb9fcb 100644
--- a/externals/glad/include/glad/glad.h
+++ b/externals/glad/include/glad/glad.h
@@ -5156,6 +5156,9 @@ GLAPI PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv;
typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDPROC)(GLuint index, GLdouble n, GLdouble f);
GLAPI PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed;
#define glDepthRangeIndexed glad_glDepthRangeIndexed
+typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDDNVPROC)(GLuint index, GLdouble n, GLdouble f);
+GLAPI PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV;
+#define glDepthRangeIndexeddNV glad_glDepthRangeIndexeddNV
typedef void (APIENTRYP PFNGLGETFLOATI_VPROC)(GLenum target, GLuint index, GLfloat *data);
GLAPI PFNGLGETFLOATI_VPROC glad_glGetFloati_v;
#define glGetFloati_v glad_glGetFloati_v
diff --git a/externals/glad/src/glad.c b/externals/glad/src/glad.c
index d3e13163f..7b24cd68d 100644
--- a/externals/glad/src/glad.c
+++ b/externals/glad/src/glad.c
@@ -1044,6 +1044,7 @@ PFNGLDEPTHMASKPROC glad_glDepthMask = NULL;
PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL;
PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv = NULL;
PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed = NULL;
+PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV = NULL;
PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL;
PFNGLDETACHSHADERPROC glad_glDetachShader = NULL;
PFNGLDISABLEPROC glad_glDisable = NULL;
@@ -7971,6 +7972,7 @@ static void load_GL_NV_depth_buffer_float(GLADloadproc load) {
glad_glDepthRangedNV = (PFNGLDEPTHRANGEDNVPROC)load("glDepthRangedNV");
glad_glClearDepthdNV = (PFNGLCLEARDEPTHDNVPROC)load("glClearDepthdNV");
glad_glDepthBoundsdNV = (PFNGLDEPTHBOUNDSDNVPROC)load("glDepthBoundsdNV");
+ glad_glDepthRangeIndexeddNV = (PFNGLDEPTHRANGEINDEXEDDNVPROC)load("glDepthRangeIndexeddNV");
}
static void load_GL_NV_draw_texture(GLADloadproc load) {
if(!GLAD_GL_NV_draw_texture) return;
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d1d177b51..a0ae07752 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -15,6 +15,8 @@ add_library(audio_core STATIC
command_generator.cpp
command_generator.h
common.h
+ delay_line.cpp
+ delay_line.h
effect_context.cpp
effect_context.h
info_updater.cpp
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 5b1065520..437cc5ccd 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cmath>
+#include <numbers>
#include "audio_core/algorithm/interpolate.h"
#include "audio_core/command_generator.h"
#include "audio_core/effect_context.h"
@@ -13,6 +15,20 @@ namespace AudioCore {
namespace {
constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00;
constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL;
+using DelayLineTimes = std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>;
+
+constexpr DelayLineTimes FDN_MIN_DELAY_LINE_TIMES{5.0f, 6.0f, 13.0f, 14.0f};
+constexpr DelayLineTimes FDN_MAX_DELAY_LINE_TIMES{45.704f, 82.782f, 149.94f, 271.58f};
+constexpr DelayLineTimes DECAY0_MAX_DELAY_LINE_TIMES{17.0f, 13.0f, 9.0f, 7.0f};
+constexpr DelayLineTimes DECAY1_MAX_DELAY_LINE_TIMES{19.0f, 11.0f, 10.0f, 6.0f};
+constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_TAP_TIMES{
+ 0.017136f, 0.059154f, 0.161733f, 0.390186f, 0.425262f, 0.455411f, 0.689737f,
+ 0.745910f, 0.833844f, 0.859502f, 0.000000f, 0.075024f, 0.168788f, 0.299901f,
+ 0.337443f, 0.371903f, 0.599011f, 0.716741f, 0.817859f, 0.851664f};
+constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
+ 0.67096f, 0.61027f, 1.0f, 0.35680f, 0.68361f, 0.65978f, 0.51939f,
+ 0.24712f, 0.45945f, 0.45021f, 0.64196f, 0.54879f, 0.92925f, 0.38270f,
+ 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
template <std::size_t N>
void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
@@ -65,6 +81,154 @@ s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
}
}
+float Pow10(float x) {
+ if (x >= 0.0f) {
+ return 1.0f;
+ } else if (x <= -5.3f) {
+ return 0.0f;
+ }
+ return std::pow(10.0f, x);
+}
+
+float SinD(float degrees) {
+ return std::sin(degrees * std::numbers::pi_v<float> / 180.0f);
+}
+
+float CosD(float degrees) {
+ return std::cos(degrees * std::numbers::pi_v<float> / 180.0f);
+}
+
+float ToFloat(s32 sample) {
+ return static_cast<float>(sample) / 65536.f;
+}
+
+s32 ToS32(float sample) {
+ constexpr auto min = -8388608.0f;
+ constexpr auto max = 8388607.f;
+ float rescaled_sample = sample * 65536.0f;
+ if (rescaled_sample < min) {
+ rescaled_sample = min;
+ }
+ if (rescaled_sample > max) {
+ rescaled_sample = max;
+ }
+ return static_cast<s32>(rescaled_sample);
+}
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_1CH{0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_2CH{0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
+ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_4CH{0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+ 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+ 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
+
+template <std::size_t CHANNEL_COUNT>
+void ApplyReverbGeneric(I3dl2ReverbState& state,
+ const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
+ const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
+ s32 sample_count) {
+
+ auto GetTapLookup = []() {
+ if constexpr (CHANNEL_COUNT == 1) {
+ return REVERB_TAP_INDEX_1CH;
+ } else if constexpr (CHANNEL_COUNT == 2) {
+ return REVERB_TAP_INDEX_2CH;
+ } else if constexpr (CHANNEL_COUNT == 4) {
+ return REVERB_TAP_INDEX_4CH;
+ } else if constexpr (CHANNEL_COUNT == 6) {
+ return REVERB_TAP_INDEX_6CH;
+ }
+ };
+
+ const auto& tap_index_lut = GetTapLookup();
+ for (s32 sample = 0; sample < sample_count; sample++) {
+ std::array<f32, CHANNEL_COUNT> out_samples{};
+ std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fsamp{};
+ std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> mixed{};
+ std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> osamp{};
+
+ // Mix everything into a single sample
+ s32 temp_mixed_sample = 0;
+ for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+ temp_mixed_sample += input[i][sample];
+ }
+ const auto current_sample = ToFloat(temp_mixed_sample);
+ const auto early_tap = state.early_delay_line.TapOut(state.early_to_late_taps);
+
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_TAPS; i++) {
+ const auto tapped_samp =
+ state.early_delay_line.TapOut(state.early_tap_steps[i]) * EARLY_GAIN[i];
+ out_samples[tap_index_lut[i]] += tapped_samp;
+
+ if constexpr (CHANNEL_COUNT == 6) {
+ // handle lfe
+ out_samples[5] += tapped_samp;
+ }
+ }
+
+ state.lowpass_0 = current_sample * state.lowpass_2 + state.lowpass_0 * state.lowpass_1;
+ state.early_delay_line.Tick(state.lowpass_0);
+
+ for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+ out_samples[i] *= state.early_gain;
+ }
+
+ // Two channel seems to apply a latet gain, we require to save this
+ f32 filter{};
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+ filter = state.fdn_delay_line[i].GetOutputSample();
+ const auto computed = filter * state.lpf_coefficients[0][i] + state.shelf_filter[i];
+ state.shelf_filter[i] =
+ filter * state.lpf_coefficients[1][i] + computed * state.lpf_coefficients[2][i];
+ fsamp[i] = computed;
+ }
+
+ // Mixing matrix
+ mixed[0] = fsamp[1] + fsamp[2];
+ mixed[1] = -fsamp[0] - fsamp[3];
+ mixed[2] = fsamp[0] - fsamp[3];
+ mixed[3] = fsamp[1] - fsamp[2];
+
+ if constexpr (CHANNEL_COUNT == 2) {
+ for (auto& mix : mixed) {
+ mix *= (filter * state.late_gain);
+ }
+ }
+
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+ const auto late = early_tap * state.late_gain;
+ osamp[i] = state.decay_delay_line0[i].Tick(late + mixed[i]);
+ osamp[i] = state.decay_delay_line1[i].Tick(osamp[i]);
+ state.fdn_delay_line[i].Tick(osamp[i]);
+ }
+
+ if constexpr (CHANNEL_COUNT == 1) {
+ output[0][sample] = ToS32(state.dry_gain * ToFloat(input[0][sample]) +
+ (out_samples[0] + osamp[0] + osamp[1]));
+ } else if constexpr (CHANNEL_COUNT == 2 || CHANNEL_COUNT == 4) {
+ for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+ output[i][sample] =
+ ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
+ }
+ } else if constexpr (CHANNEL_COUNT == 6) {
+ const auto temp_center = state.center_delay_line.Tick(0.5f * (osamp[2] - osamp[3]));
+ for (std::size_t i = 0; i < 4; i++) {
+ output[i][sample] =
+ ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
+ }
+ output[4][sample] =
+ ToS32(state.dry_gain * ToFloat(input[4][sample]) + (out_samples[4] + temp_center));
+ output[5][sample] =
+ ToS32(state.dry_gain * ToFloat(input[5][sample]) + (out_samples[5] + osamp[3]));
+ }
+ }
+}
+
} // namespace
CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_,
@@ -271,11 +435,10 @@ void CommandGenerator::GenerateBiquadFilterCommandForVoice(ServerVoiceInfo& voic
}
// Generate biquad filter
- // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
- // dsp_state.biquad_filter_state,
- // mix_buffer_count + channel, mix_buffer_count +
- // channel, worker_params.sample_count,
- // voice_info.GetInParams().node_id);
+ // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
+ // dsp_state.biquad_filter_state,
+ // mix_buffer_count + channel, mix_buffer_count + channel,
+ // worker_params.sample_count, voice_info.GetInParams().node_id);
}
}
@@ -376,21 +539,54 @@ void CommandGenerator::GenerateEffectCommand(ServerMixInfo& mix_info) {
void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info,
bool enabled) {
- if (!enabled) {
+ auto* reverb = dynamic_cast<EffectI3dl2Reverb*>(info);
+ const auto& params = reverb->GetParams();
+ auto& state = reverb->GetState();
+ const auto channel_count = params.channel_count;
+
+ if (channel_count != 1 && channel_count != 2 && channel_count != 4 && channel_count != 6) {
return;
}
- const auto& params = dynamic_cast<EffectI3dl2Reverb*>(info)->GetParams();
- const auto channel_count = params.channel_count;
+
+ std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
+ std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+
+ const auto status = params.status;
for (s32 i = 0; i < channel_count; i++) {
- // TODO(ogniK): Actually implement reverb
- /*
- if (params.input[i] != params.output[i]) {
- const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
- auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
- ApplyMix<1>(output, input, 32768, worker_params.sample_count);
- }*/
- auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
- std::memset(output, 0, worker_params.sample_count * sizeof(s32));
+ input[i] = GetMixBuffer(mix_buffer_offset + params.input[i]);
+ output[i] = GetMixBuffer(mix_buffer_offset + params.output[i]);
+ }
+
+ if (enabled) {
+ if (status == ParameterStatus::Initialized) {
+ InitializeI3dl2Reverb(reverb->GetParams(), state, info->GetWorkBuffer());
+ } else if (status == ParameterStatus::Updating) {
+ UpdateI3dl2Reverb(reverb->GetParams(), state, false);
+ }
+ }
+
+ if (enabled) {
+ switch (channel_count) {
+ case 1:
+ ApplyReverbGeneric<1>(state, input, output, worker_params.sample_count);
+ break;
+ case 2:
+ ApplyReverbGeneric<2>(state, input, output, worker_params.sample_count);
+ break;
+ case 4:
+ ApplyReverbGeneric<4>(state, input, output, worker_params.sample_count);
+ break;
+ case 6:
+ ApplyReverbGeneric<6>(state, input, output, worker_params.sample_count);
+ break;
+ }
+ } else {
+ for (s32 i = 0; i < channel_count; i++) {
+ // Only copy if the buffer input and output do not match!
+ if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
+ std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+ }
+ }
}
}
@@ -528,6 +724,133 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
return sample_count;
}
+void CommandGenerator::InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+ std::vector<u8>& work_buffer) {
+ // Reset state
+ state.lowpass_0 = 0.0f;
+ state.lowpass_1 = 0.0f;
+ state.lowpass_2 = 0.0f;
+
+ state.early_delay_line.Reset();
+ state.early_tap_steps.fill(0);
+ state.early_gain = 0.0f;
+ state.late_gain = 0.0f;
+ state.early_to_late_taps = 0;
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+ state.fdn_delay_line[i].Reset();
+ state.decay_delay_line0[i].Reset();
+ state.decay_delay_line1[i].Reset();
+ }
+ state.last_reverb_echo = 0.0f;
+ state.center_delay_line.Reset();
+ for (auto& coef : state.lpf_coefficients) {
+ coef.fill(0.0f);
+ }
+ state.shelf_filter.fill(0.0f);
+ state.dry_gain = 0.0f;
+
+ const auto sample_rate = info.sample_rate / 1000;
+ f32* work_buffer_ptr = reinterpret_cast<f32*>(work_buffer.data());
+
+ s32 delay_samples{};
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+ delay_samples =
+ AudioCommon::CalculateDelaySamples(sample_rate, FDN_MAX_DELAY_LINE_TIMES[i]);
+ state.fdn_delay_line[i].Initialize(delay_samples, work_buffer_ptr);
+ work_buffer_ptr += delay_samples + 1;
+
+ delay_samples =
+ AudioCommon::CalculateDelaySamples(sample_rate, DECAY0_MAX_DELAY_LINE_TIMES[i]);
+ state.decay_delay_line0[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
+ work_buffer_ptr += delay_samples + 1;
+
+ delay_samples =
+ AudioCommon::CalculateDelaySamples(sample_rate, DECAY1_MAX_DELAY_LINE_TIMES[i]);
+ state.decay_delay_line1[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
+ work_buffer_ptr += delay_samples + 1;
+ }
+ delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 5.0f);
+ state.center_delay_line.Initialize(delay_samples, work_buffer_ptr);
+ work_buffer_ptr += delay_samples + 1;
+
+ delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 400.0f);
+ state.early_delay_line.Initialize(delay_samples, work_buffer_ptr);
+
+ UpdateI3dl2Reverb(info, state, true);
+}
+
+void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+ bool should_clear) {
+
+ state.dry_gain = info.dry_gain;
+ state.shelf_filter.fill(0.0f);
+ state.lowpass_0 = 0.0f;
+ state.early_gain = Pow10(std::min(info.room + info.reflection, 5000.0f) / 2000.0f);
+ state.late_gain = Pow10(std::min(info.room + info.reverb, 5000.0f) / 2000.0f);
+
+ const auto sample_rate = info.sample_rate / 1000;
+ const f32 hf_gain = Pow10(info.room_hf / 2000.0f);
+ if (hf_gain >= 1.0f) {
+ state.lowpass_2 = 1.0f;
+ state.lowpass_1 = 0.0f;
+ } else {
+ const auto a = 1.0f - hf_gain;
+ const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
+ static_cast<f32>(info.sample_rate)));
+ const auto c = std::sqrt(b * b - 4.0f * a * a);
+
+ state.lowpass_1 = (b - c) / (2.0f * a);
+ state.lowpass_2 = 1.0f - state.lowpass_1;
+ }
+ state.early_to_late_taps = AudioCommon::CalculateDelaySamples(
+ sample_rate, 1000.0f * (info.reflection_delay + info.reverb_delay));
+
+ state.last_reverb_echo = 0.6f * info.diffusion * 0.01f;
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+ const auto length =
+ FDN_MIN_DELAY_LINE_TIMES[i] +
+ (info.density / 100.0f) * (FDN_MAX_DELAY_LINE_TIMES[i] - FDN_MIN_DELAY_LINE_TIMES[i]);
+ state.fdn_delay_line[i].SetDelay(AudioCommon::CalculateDelaySamples(sample_rate, length));
+
+ const auto delay_sample_counts = state.fdn_delay_line[i].GetDelay() +
+ state.decay_delay_line0[i].GetDelay() +
+ state.decay_delay_line1[i].GetDelay();
+
+ float a = (-60.0f * static_cast<f32>(delay_sample_counts)) /
+ (info.decay_time * static_cast<f32>(info.sample_rate));
+ float b = a / info.hf_decay_ratio;
+ float c = CosD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)) /
+ SinD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate));
+ float d = Pow10((b - a) / 40.0f);
+ float e = Pow10((b + a) / 40.0f) * 0.7071f;
+
+ state.lpf_coefficients[0][i] = e * ((d * c) + 1.0f) / (c + d);
+ state.lpf_coefficients[1][i] = e * (1.0f - (d * c)) / (c + d);
+ state.lpf_coefficients[2][i] = (c - d) / (c + d);
+
+ state.decay_delay_line0[i].SetCoefficient(state.last_reverb_echo);
+ state.decay_delay_line1[i].SetCoefficient(-0.9f * state.last_reverb_echo);
+ }
+
+ if (should_clear) {
+ for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+ state.fdn_delay_line[i].Clear();
+ state.decay_delay_line0[i].Clear();
+ state.decay_delay_line1[i].Clear();
+ }
+ state.early_delay_line.Clear();
+ state.center_delay_line.Clear();
+ }
+
+ const auto max_early_delay = state.early_delay_line.GetMaxDelay();
+ const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
+ for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
+ const auto length = AudioCommon::CalculateDelaySamples(
+ sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
+ state.early_tap_steps[tap] = std::min(length, max_early_delay);
+ }
+}
+
void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume,
s32 channel, s32 node_id) {
const auto last = static_cast<s32>(last_volume * 32768.0f);
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index b937350b1..2ebb755b0 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -21,6 +21,8 @@ class ServerMixInfo;
class EffectContext;
class EffectBase;
struct AuxInfoDSP;
+struct I3dl2ReverbParams;
+struct I3dl2ReverbState;
using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>;
class CommandGenerator {
@@ -80,6 +82,9 @@ private:
s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
u32 sample_count, u32 read_offset, u32 read_count);
+ void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+ std::vector<u8>& work_buffer);
+ void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
// DSP Code
s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
s32 channel, std::size_t mix_offset);
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index ec59a3ba9..fe546c55d 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -33,6 +33,29 @@ constexpr std::size_t TEMP_MIX_BASE_SIZE = 0x3f00; // TODO(ogniK): Work out this
// and our const ends up being 0x3f04, the 4 bytes are most
// likely the sample history
constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY;
+constexpr f32 I3DL2REVERB_MAX_LEVEL = 5000.0f;
+constexpr f32 I3DL2REVERB_MIN_REFLECTION_DURATION = 0.02f;
+constexpr std::size_t I3DL2REVERB_TAPS = 20;
+constexpr std::size_t I3DL2REVERB_DELAY_LINE_COUNT = 4;
+using Fractional = s32;
+
+template <typename T>
+constexpr Fractional ToFractional(T x) {
+ return static_cast<Fractional>(x * static_cast<T>(0x4000));
+}
+
+constexpr Fractional MultiplyFractional(Fractional lhs, Fractional rhs) {
+ return static_cast<Fractional>(static_cast<s64>(lhs) * rhs >> 14);
+}
+
+constexpr s32 FractionalToFixed(Fractional x) {
+ const auto s = x & (1 << 13);
+ return static_cast<s32>(x >> 14) + s;
+}
+
+constexpr s32 CalculateDelaySamples(s32 sample_rate_khz, float time) {
+ return FractionalToFixed(MultiplyFractional(ToFractional(sample_rate_khz), ToFractional(time)));
+}
static constexpr u32 VersionFromRevision(u32_le rev) {
// "REV7" -> 7
diff --git a/src/audio_core/delay_line.cpp b/src/audio_core/delay_line.cpp
new file mode 100644
index 000000000..f4e4dd8d2
--- /dev/null
+++ b/src/audio_core/delay_line.cpp
@@ -0,0 +1,104 @@
+#include <cstring>
+#include "audio_core/delay_line.h"
+
+namespace AudioCore {
+DelayLineBase::DelayLineBase() = default;
+DelayLineBase::~DelayLineBase() = default;
+
+void DelayLineBase::Initialize(s32 max_delay_, float* src_buffer) {
+ buffer = src_buffer;
+ buffer_end = buffer + max_delay_;
+ max_delay = max_delay_;
+ output = buffer;
+ SetDelay(max_delay_);
+ Clear();
+}
+
+void DelayLineBase::SetDelay(s32 new_delay) {
+ if (max_delay < new_delay) {
+ return;
+ }
+ delay = new_delay;
+ input = (buffer + ((output - buffer) + new_delay) % (max_delay + 1));
+}
+
+s32 DelayLineBase::GetDelay() const {
+ return delay;
+}
+
+s32 DelayLineBase::GetMaxDelay() const {
+ return max_delay;
+}
+
+f32 DelayLineBase::TapOut(s32 last_sample) {
+ const float* ptr = input - (last_sample + 1);
+ if (ptr < buffer) {
+ ptr += (max_delay + 1);
+ }
+
+ return *ptr;
+}
+
+f32 DelayLineBase::Tick(f32 sample) {
+ *(input++) = sample;
+ const auto out_sample = *(output++);
+
+ if (buffer_end < input) {
+ input = buffer;
+ }
+
+ if (buffer_end < output) {
+ output = buffer;
+ }
+
+ return out_sample;
+}
+
+float* DelayLineBase::GetInput() {
+ return input;
+}
+
+const float* DelayLineBase::GetInput() const {
+ return input;
+}
+
+f32 DelayLineBase::GetOutputSample() const {
+ return *output;
+}
+
+void DelayLineBase::Clear() {
+ std::memset(buffer, 0, sizeof(float) * max_delay);
+}
+
+void DelayLineBase::Reset() {
+ buffer = nullptr;
+ buffer_end = nullptr;
+ max_delay = 0;
+ input = nullptr;
+ output = nullptr;
+ delay = 0;
+}
+
+DelayLineAllPass::DelayLineAllPass() = default;
+DelayLineAllPass::~DelayLineAllPass() = default;
+
+void DelayLineAllPass::Initialize(u32 delay_, float coeffcient_, f32* src_buffer) {
+ DelayLineBase::Initialize(delay_, src_buffer);
+ SetCoefficient(coeffcient_);
+}
+
+void DelayLineAllPass::SetCoefficient(float coeffcient_) {
+ coefficient = coeffcient_;
+}
+
+f32 DelayLineAllPass::Tick(f32 sample) {
+ const auto temp = sample - coefficient * *output;
+ return coefficient * temp + DelayLineBase::Tick(temp);
+}
+
+void DelayLineAllPass::Reset() {
+ coefficient = 0.0f;
+ DelayLineBase::Reset();
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/delay_line.h b/src/audio_core/delay_line.h
new file mode 100644
index 000000000..cafddd432
--- /dev/null
+++ b/src/audio_core/delay_line.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class DelayLineBase {
+public:
+ DelayLineBase();
+ ~DelayLineBase();
+
+ void Initialize(s32 max_delay_, float* src_buffer);
+ void SetDelay(s32 new_delay);
+ s32 GetDelay() const;
+ s32 GetMaxDelay() const;
+ f32 TapOut(s32 last_sample);
+ f32 Tick(f32 sample);
+ float* GetInput();
+ const float* GetInput() const;
+ f32 GetOutputSample() const;
+ void Clear();
+ void Reset();
+
+protected:
+ float* buffer{nullptr};
+ float* buffer_end{nullptr};
+ s32 max_delay{};
+ float* input{nullptr};
+ float* output{nullptr};
+ s32 delay{};
+};
+
+class DelayLineAllPass final : public DelayLineBase {
+public:
+ DelayLineAllPass();
+ ~DelayLineAllPass();
+
+ void Initialize(u32 delay, float coeffcient_, f32* src_buffer);
+ void SetCoefficient(float coeffcient_);
+ f32 Tick(f32 sample);
+ void Reset();
+
+private:
+ float coefficient{};
+};
+} // namespace AudioCore
diff --git a/src/audio_core/effect_context.cpp b/src/audio_core/effect_context.cpp
index f770b9608..89e4573c7 100644
--- a/src/audio_core/effect_context.cpp
+++ b/src/audio_core/effect_context.cpp
@@ -90,6 +90,14 @@ s32 EffectBase::GetProcessingOrder() const {
return processing_order;
}
+std::vector<u8>& EffectBase::GetWorkBuffer() {
+ return work_buffer;
+}
+
+const std::vector<u8>& EffectBase::GetWorkBuffer() const {
+ return work_buffer;
+}
+
EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {}
EffectI3dl2Reverb::~EffectI3dl2Reverb() = default;
@@ -117,6 +125,12 @@ void EffectI3dl2Reverb::Update(EffectInfo::InParams& in_params) {
usage = UsageState::Initialized;
params.status = ParameterStatus::Initialized;
skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0;
+ if (!skipped) {
+ auto& cur_work_buffer = GetWorkBuffer();
+ // Has two buffers internally
+ cur_work_buffer.resize(in_params.buffer_size * 2);
+ std::fill(cur_work_buffer.begin(), cur_work_buffer.end(), 0);
+ }
}
}
@@ -129,6 +143,14 @@ void EffectI3dl2Reverb::UpdateForCommandGeneration() {
GetParams().status = ParameterStatus::Updated;
}
+I3dl2ReverbState& EffectI3dl2Reverb::GetState() {
+ return state;
+}
+
+const I3dl2ReverbState& EffectI3dl2Reverb::GetState() const {
+ return state;
+}
+
EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {}
EffectBiquadFilter::~EffectBiquadFilter() = default;
diff --git a/src/audio_core/effect_context.h b/src/audio_core/effect_context.h
index c5e0b398c..5e0655dd7 100644
--- a/src/audio_core/effect_context.h
+++ b/src/audio_core/effect_context.h
@@ -8,6 +8,7 @@
#include <memory>
#include <vector>
#include "audio_core/common.h"
+#include "audio_core/delay_line.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
@@ -194,6 +195,8 @@ public:
[[nodiscard]] bool IsEnabled() const;
[[nodiscard]] s32 GetMixID() const;
[[nodiscard]] s32 GetProcessingOrder() const;
+ [[nodiscard]] std::vector<u8>& GetWorkBuffer();
+ [[nodiscard]] const std::vector<u8>& GetWorkBuffer() const;
protected:
UsageState usage{UsageState::Invalid};
@@ -201,6 +204,7 @@ protected:
s32 mix_id{};
s32 processing_order{};
bool enabled = false;
+ std::vector<u8> work_buffer{};
};
template <typename T>
@@ -212,7 +216,7 @@ public:
return internal_params;
}
- const I3dl2ReverbParams& GetParams() const {
+ const T& GetParams() const {
return internal_params;
}
@@ -229,6 +233,27 @@ public:
void UpdateForCommandGeneration() override;
};
+struct I3dl2ReverbState {
+ f32 lowpass_0{};
+ f32 lowpass_1{};
+ f32 lowpass_2{};
+
+ DelayLineBase early_delay_line{};
+ std::array<u32, AudioCommon::I3DL2REVERB_TAPS> early_tap_steps{};
+ f32 early_gain{};
+ f32 late_gain{};
+
+ u32 early_to_late_taps{};
+ std::array<DelayLineBase, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fdn_delay_line{};
+ std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line0{};
+ std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line1{};
+ f32 last_reverb_echo{};
+ DelayLineBase center_delay_line{};
+ std::array<std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>, 3> lpf_coefficients{};
+ std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> shelf_filter{};
+ f32 dry_gain{};
+};
+
class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> {
public:
explicit EffectI3dl2Reverb();
@@ -237,8 +262,12 @@ public:
void Update(EffectInfo::InParams& in_params) override;
void UpdateForCommandGeneration() override;
+ I3dl2ReverbState& GetState();
+ const I3dl2ReverbState& GetState() const;
+
private:
bool skipped = false;
+ I3dl2ReverbState state{};
};
class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> {
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 5b0b285cd..b0f6f0c34 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -111,7 +111,14 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) {
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
- core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer) - ns_late, release_event, {});
+ const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer);
+
+ // If ns_late is higher than the update rate ignore the delay
+ if (ns_late > buffer_release_ns) {
+ ns_late = {};
+ }
+
+ core_timing.ScheduleEvent(buffer_release_ns - ns_late, release_event, {});
}
void Stream::ReleaseActiveBuffer(std::chrono::nanoseconds ns_late) {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bfd11e76d..788516ded 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -167,8 +167,8 @@ add_library(common STATIC
threadsafe_queue.h
time_zone.cpp
time_zone.h
+ tiny_mt.h
tree.h
- uint128.cpp
uint128.h
uuid.cpp
uuid.h
@@ -206,6 +206,8 @@ if (MSVC)
else()
target_compile_options(common PRIVATE
-Werror
+
+ $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
)
endif()
diff --git a/src/common/alignment.h b/src/common/alignment.h
index fb81f10d8..32d796ffa 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -42,6 +42,11 @@ requires std::is_integral_v<T>[[nodiscard]] constexpr bool IsAligned(T value, si
return (value & mask) == 0;
}
+template <typename T, typename U>
+requires std::is_integral_v<T>[[nodiscard]] constexpr T DivideUp(T x, U y) {
+ return (x + (y - 1)) / y;
+}
+
template <typename T, size_t Align = 16>
class AlignmentAllocator {
public:
diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp
index 4e1d874b5..66218fc21 100644
--- a/src/common/cityhash.cpp
+++ b/src/common/cityhash.cpp
@@ -28,8 +28,10 @@
// compromising on hash quality.
#include <algorithm>
-#include <string.h> // for memcpy and memset
-#include "cityhash.h"
+#include <cstring>
+#include <utility>
+
+#include "common/cityhash.h"
#include "common/swap.h"
// #include "config.h"
@@ -42,21 +44,17 @@
using namespace std;
-typedef uint8_t uint8;
-typedef uint32_t uint32;
-typedef uint64_t uint64;
-
namespace Common {
-static uint64 UNALIGNED_LOAD64(const char* p) {
- uint64 result;
- memcpy(&result, p, sizeof(result));
+static u64 unaligned_load64(const char* p) {
+ u64 result;
+ std::memcpy(&result, p, sizeof(result));
return result;
}
-static uint32 UNALIGNED_LOAD32(const char* p) {
- uint32 result;
- memcpy(&result, p, sizeof(result));
+static u32 unaligned_load32(const char* p) {
+ u32 result;
+ std::memcpy(&result, p, sizeof(result));
return result;
}
@@ -76,64 +74,64 @@ static uint32 UNALIGNED_LOAD32(const char* p) {
#endif
#endif
-static uint64 Fetch64(const char* p) {
- return uint64_in_expected_order(UNALIGNED_LOAD64(p));
+static u64 Fetch64(const char* p) {
+ return uint64_in_expected_order(unaligned_load64(p));
}
-static uint32 Fetch32(const char* p) {
- return uint32_in_expected_order(UNALIGNED_LOAD32(p));
+static u32 Fetch32(const char* p) {
+ return uint32_in_expected_order(unaligned_load32(p));
}
// Some primes between 2^63 and 2^64 for various uses.
-static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
-static const uint64 k1 = 0xb492b66fbe98f273ULL;
-static const uint64 k2 = 0x9ae16a3b2f90404fULL;
+static constexpr u64 k0 = 0xc3a5c85c97cb3127ULL;
+static constexpr u64 k1 = 0xb492b66fbe98f273ULL;
+static constexpr u64 k2 = 0x9ae16a3b2f90404fULL;
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
-static uint64 Rotate(uint64 val, int shift) {
+static u64 Rotate(u64 val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}
-static uint64 ShiftMix(uint64 val) {
+static u64 ShiftMix(u64 val) {
return val ^ (val >> 47);
}
-static uint64 HashLen16(uint64 u, uint64 v) {
- return Hash128to64(uint128(u, v));
+static u64 HashLen16(u64 u, u64 v) {
+ return Hash128to64(u128{u, v});
}
-static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
+static u64 HashLen16(u64 u, u64 v, u64 mul) {
// Murmur-inspired hashing.
- uint64 a = (u ^ v) * mul;
+ u64 a = (u ^ v) * mul;
a ^= (a >> 47);
- uint64 b = (v ^ a) * mul;
+ u64 b = (v ^ a) * mul;
b ^= (b >> 47);
b *= mul;
return b;
}
-static uint64 HashLen0to16(const char* s, std::size_t len) {
+static u64 HashLen0to16(const char* s, size_t len) {
if (len >= 8) {
- uint64 mul = k2 + len * 2;
- uint64 a = Fetch64(s) + k2;
- uint64 b = Fetch64(s + len - 8);
- uint64 c = Rotate(b, 37) * mul + a;
- uint64 d = (Rotate(a, 25) + b) * mul;
+ u64 mul = k2 + len * 2;
+ u64 a = Fetch64(s) + k2;
+ u64 b = Fetch64(s + len - 8);
+ u64 c = Rotate(b, 37) * mul + a;
+ u64 d = (Rotate(a, 25) + b) * mul;
return HashLen16(c, d, mul);
}
if (len >= 4) {
- uint64 mul = k2 + len * 2;
- uint64 a = Fetch32(s);
+ u64 mul = k2 + len * 2;
+ u64 a = Fetch32(s);
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
}
if (len > 0) {
- uint8 a = s[0];
- uint8 b = s[len >> 1];
- uint8 c = s[len - 1];
- uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
- uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
+ u8 a = s[0];
+ u8 b = s[len >> 1];
+ u8 c = s[len - 1];
+ u32 y = static_cast<u32>(a) + (static_cast<u32>(b) << 8);
+ u32 z = static_cast<u32>(len) + (static_cast<u32>(c) << 2);
return ShiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
@@ -141,22 +139,21 @@ static uint64 HashLen0to16(const char* s, std::size_t len) {
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
-static uint64 HashLen17to32(const char* s, std::size_t len) {
- uint64 mul = k2 + len * 2;
- uint64 a = Fetch64(s) * k1;
- uint64 b = Fetch64(s + 8);
- uint64 c = Fetch64(s + len - 8) * mul;
- uint64 d = Fetch64(s + len - 16) * k2;
+static u64 HashLen17to32(const char* s, size_t len) {
+ u64 mul = k2 + len * 2;
+ u64 a = Fetch64(s) * k1;
+ u64 b = Fetch64(s + 8);
+ u64 c = Fetch64(s + len - 8) * mul;
+ u64 d = Fetch64(s + len - 16) * k2;
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul);
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
-static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a,
- uint64 b) {
+static pair<u64, u64> WeakHashLen32WithSeeds(u64 w, u64 x, u64 y, u64 z, u64 a, u64 b) {
a += w;
b = Rotate(b + a + z, 21);
- uint64 c = a;
+ u64 c = a;
a += x;
a += y;
b += Rotate(a, 44);
@@ -164,34 +161,34 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y,
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
-static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) {
+static pair<u64, u64> WeakHashLen32WithSeeds(const char* s, u64 a, u64 b) {
return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a,
b);
}
// Return an 8-byte hash for 33 to 64 bytes.
-static uint64 HashLen33to64(const char* s, std::size_t len) {
- uint64 mul = k2 + len * 2;
- uint64 a = Fetch64(s) * k2;
- uint64 b = Fetch64(s + 8);
- uint64 c = Fetch64(s + len - 24);
- uint64 d = Fetch64(s + len - 32);
- uint64 e = Fetch64(s + 16) * k2;
- uint64 f = Fetch64(s + 24) * 9;
- uint64 g = Fetch64(s + len - 8);
- uint64 h = Fetch64(s + len - 16) * mul;
- uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
- uint64 v = ((a + g) ^ d) + f + 1;
- uint64 w = swap64((u + v) * mul) + h;
- uint64 x = Rotate(e + f, 42) + c;
- uint64 y = (swap64((v + w) * mul) + g) * mul;
- uint64 z = e + f + c;
+static u64 HashLen33to64(const char* s, size_t len) {
+ u64 mul = k2 + len * 2;
+ u64 a = Fetch64(s) * k2;
+ u64 b = Fetch64(s + 8);
+ u64 c = Fetch64(s + len - 24);
+ u64 d = Fetch64(s + len - 32);
+ u64 e = Fetch64(s + 16) * k2;
+ u64 f = Fetch64(s + 24) * 9;
+ u64 g = Fetch64(s + len - 8);
+ u64 h = Fetch64(s + len - 16) * mul;
+ u64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
+ u64 v = ((a + g) ^ d) + f + 1;
+ u64 w = swap64((u + v) * mul) + h;
+ u64 x = Rotate(e + f, 42) + c;
+ u64 y = (swap64((v + w) * mul) + g) * mul;
+ u64 z = e + f + c;
a = swap64((x + z) * mul + y) + b;
b = ShiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
-uint64 CityHash64(const char* s, std::size_t len) {
+u64 CityHash64(const char* s, size_t len) {
if (len <= 32) {
if (len <= 16) {
return HashLen0to16(s, len);
@@ -204,15 +201,15 @@ uint64 CityHash64(const char* s, std::size_t len) {
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
- uint64 x = Fetch64(s + len - 40);
- uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
- uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
- pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
- pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
+ u64 x = Fetch64(s + len - 40);
+ u64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
+ u64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
+ pair<u64, u64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
+ pair<u64, u64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
x = x * k1 + Fetch64(s);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
- len = (len - 1) & ~static_cast<std::size_t>(63);
+ len = (len - 1) & ~static_cast<size_t>(63);
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
@@ -229,21 +226,21 @@ uint64 CityHash64(const char* s, std::size_t len) {
HashLen16(v.second, w.second) + x);
}
-uint64 CityHash64WithSeed(const char* s, std::size_t len, uint64 seed) {
+u64 CityHash64WithSeed(const char* s, size_t len, u64 seed) {
return CityHash64WithSeeds(s, len, k2, seed);
}
-uint64 CityHash64WithSeeds(const char* s, std::size_t len, uint64 seed0, uint64 seed1) {
+u64 CityHash64WithSeeds(const char* s, size_t len, u64 seed0, u64 seed1) {
return HashLen16(CityHash64(s, len) - seed0, seed1);
}
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
-static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) {
- uint64 a = Uint128Low64(seed);
- uint64 b = Uint128High64(seed);
- uint64 c = 0;
- uint64 d = 0;
+static u128 CityMurmur(const char* s, size_t len, u128 seed) {
+ u64 a = seed[0];
+ u64 b = seed[1];
+ u64 c = 0;
+ u64 d = 0;
signed long l = static_cast<long>(len) - 16;
if (l <= 0) { // len <= 16
a = ShiftMix(a * k1) * k1;
@@ -266,20 +263,20 @@ static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) {
}
a = HashLen16(a, c);
b = HashLen16(d, b);
- return uint128(a ^ b, HashLen16(b, a));
+ return u128{a ^ b, HashLen16(b, a)};
}
-uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
+u128 CityHash128WithSeed(const char* s, size_t len, u128 seed) {
if (len < 128) {
return CityMurmur(s, len, seed);
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
- pair<uint64, uint64> v, w;
- uint64 x = Uint128Low64(seed);
- uint64 y = Uint128High64(seed);
- uint64 z = len * k1;
+ pair<u64, u64> v, w;
+ u64 x = seed[0];
+ u64 y = seed[1];
+ u64 z = len * k1;
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
w.first = Rotate(y + z, 35) * k1 + x;
@@ -313,7 +310,7 @@ uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
w.first *= 9;
v.first *= k0;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
- for (std::size_t tail_done = 0; tail_done < len;) {
+ for (size_t tail_done = 0; tail_done < len;) {
tail_done += 32;
y = Rotate(x + y, 42) * k0 + v.second;
w.first += Fetch64(s + len - tail_done + 16);
@@ -328,13 +325,12 @@ uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16(x, v.first);
y = HashLen16(y + z, w.first);
- return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
+ return u128{HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)};
}
-uint128 CityHash128(const char* s, std::size_t len) {
- return len >= 16
- ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
- : CityHash128WithSeed(s, len, uint128(k0, k1));
+u128 CityHash128(const char* s, size_t len) {
+ return len >= 16 ? CityHash128WithSeed(s + 16, len - 16, u128{Fetch64(s), Fetch64(s + 8) + k0})
+ : CityHash128WithSeed(s, len, u128{k0, k1});
}
} // namespace Common
diff --git a/src/common/cityhash.h b/src/common/cityhash.h
index a00804e01..d74fc7639 100644
--- a/src/common/cityhash.h
+++ b/src/common/cityhash.h
@@ -62,49 +62,38 @@
#pragma once
#include <cstddef>
-#include <cstdint>
-#include <utility>
+#include "common/common_types.h"
namespace Common {
-using uint128 = std::pair<uint64_t, uint64_t>;
-
-[[nodiscard]] inline uint64_t Uint128Low64(const uint128& x) {
- return x.first;
-}
-[[nodiscard]] inline uint64_t Uint128High64(const uint128& x) {
- return x.second;
-}
-
// Hash function for a byte array.
-[[nodiscard]] uint64_t CityHash64(const char* buf, std::size_t len);
+[[nodiscard]] u64 CityHash64(const char* buf, size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
-[[nodiscard]] uint64_t CityHash64WithSeed(const char* buf, std::size_t len, uint64_t seed);
+[[nodiscard]] u64 CityHash64WithSeed(const char* buf, size_t len, u64 seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
-[[nodiscard]] uint64_t CityHash64WithSeeds(const char* buf, std::size_t len, uint64_t seed0,
- uint64_t seed1);
+[[nodiscard]] u64 CityHash64WithSeeds(const char* buf, size_t len, u64 seed0, u64 seed1);
// Hash function for a byte array.
-[[nodiscard]] uint128 CityHash128(const char* s, std::size_t len);
+[[nodiscard]] u128 CityHash128(const char* s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
-[[nodiscard]] uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed);
+[[nodiscard]] u128 CityHash128WithSeed(const char* s, size_t len, u128 seed);
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
-[[nodiscard]] inline uint64_t Hash128to64(const uint128& x) {
+[[nodiscard]] inline u64 Hash128to64(const u128& x) {
// Murmur-inspired hashing.
- const uint64_t kMul = 0x9ddfea08eb382d69ULL;
- uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
+ const u64 mul = 0x9ddfea08eb382d69ULL;
+ u64 a = (x[0] ^ x[1]) * mul;
a ^= (a >> 47);
- uint64_t b = (Uint128High64(x) ^ a) * kMul;
+ u64 b = (x[1] ^ a) * mul;
b ^= (b >> 47);
- b *= kMul;
+ b *= mul;
return b;
}
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 4cba2aaa4..7b614ad89 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -141,27 +141,13 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
}
std::string UTF16ToUTF8(const std::u16string& input) {
-#ifdef _MSC_VER
- // Workaround for missing char16_t/char32_t instantiations in MSVC2017
- std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
- std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend());
- return convert.to_bytes(tmp_buffer);
-#else
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
return convert.to_bytes(input);
-#endif
}
std::u16string UTF8ToUTF16(const std::string& input) {
-#ifdef _MSC_VER
- // Workaround for missing char16_t/char32_t instantiations in MSVC2017
- std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
- auto tmp_buffer = convert.from_bytes(input);
- return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend());
-#else
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
return convert.from_bytes(input);
-#endif
}
#ifdef _WIN32
diff --git a/src/common/tiny_mt.h b/src/common/tiny_mt.h
new file mode 100644
index 000000000..19ae5b7d6
--- /dev/null
+++ b/src/common/tiny_mt.h
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+
+namespace Common {
+
+// Implementation of TinyMT (mersenne twister RNG).
+// Like Nintendo, we will use the sample parameters.
+class TinyMT {
+public:
+ static constexpr std::size_t NumStateWords = 4;
+
+ struct State {
+ std::array<u32, NumStateWords> data{};
+ };
+
+private:
+ static constexpr u32 ParamMat1 = 0x8F7011EE;
+ static constexpr u32 ParamMat2 = 0xFC78FF1F;
+ static constexpr u32 ParamTmat = 0x3793FDFF;
+
+ static constexpr u32 ParamMult = 0x6C078965;
+ static constexpr u32 ParamPlus = 0x0019660D;
+ static constexpr u32 ParamXor = 0x5D588B65;
+
+ static constexpr u32 TopBitmask = 0x7FFFFFFF;
+
+ static constexpr int MinimumInitIterations = 8;
+ static constexpr int NumDiscardedInitOutputs = 8;
+
+ static constexpr u32 XorByShifted27(u32 value) {
+ return value ^ (value >> 27);
+ }
+
+ static constexpr u32 XorByShifted30(u32 value) {
+ return value ^ (value >> 30);
+ }
+
+private:
+ State state{};
+
+private:
+ // Internal API.
+ void FinalizeInitialization() {
+ const u32 state0 = this->state.data[0] & TopBitmask;
+ const u32 state1 = this->state.data[1];
+ const u32 state2 = this->state.data[2];
+ const u32 state3 = this->state.data[3];
+
+ if (state0 == 0 && state1 == 0 && state2 == 0 && state3 == 0) {
+ this->state.data[0] = 'T';
+ this->state.data[1] = 'I';
+ this->state.data[2] = 'N';
+ this->state.data[3] = 'Y';
+ }
+
+ for (int i = 0; i < NumDiscardedInitOutputs; i++) {
+ this->GenerateRandomU32();
+ }
+ }
+
+ u32 GenerateRandomU24() {
+ return (this->GenerateRandomU32() >> 8);
+ }
+
+ static void GenerateInitialValuePlus(TinyMT::State* state, int index, u32 value) {
+ u32& state0 = state->data[(index + 0) % NumStateWords];
+ u32& state1 = state->data[(index + 1) % NumStateWords];
+ u32& state2 = state->data[(index + 2) % NumStateWords];
+ u32& state3 = state->data[(index + 3) % NumStateWords];
+
+ const u32 x = XorByShifted27(state0 ^ state1 ^ state3) * ParamPlus;
+ const u32 y = x + index + value;
+
+ state0 = y;
+ state1 += x;
+ state2 += y;
+ }
+
+ static void GenerateInitialValueXor(TinyMT::State* state, int index) {
+ u32& state0 = state->data[(index + 0) % NumStateWords];
+ u32& state1 = state->data[(index + 1) % NumStateWords];
+ u32& state2 = state->data[(index + 2) % NumStateWords];
+ u32& state3 = state->data[(index + 3) % NumStateWords];
+
+ const u32 x = XorByShifted27(state0 + state1 + state3) * ParamXor;
+ const u32 y = x - index;
+
+ state0 = y;
+ state1 ^= x;
+ state2 ^= y;
+ }
+
+public:
+ constexpr TinyMT() = default;
+
+ // Public API.
+
+ // Initialization.
+ void Initialize(u32 seed) {
+ this->state.data[0] = seed;
+ this->state.data[1] = ParamMat1;
+ this->state.data[2] = ParamMat2;
+ this->state.data[3] = ParamTmat;
+
+ for (int i = 1; i < MinimumInitIterations; i++) {
+ const u32 mixed = XorByShifted30(this->state.data[(i - 1) % NumStateWords]);
+ this->state.data[i % NumStateWords] ^= mixed * ParamMult + i;
+ }
+
+ this->FinalizeInitialization();
+ }
+
+ void Initialize(const u32* seed, int seed_count) {
+ this->state.data[0] = 0;
+ this->state.data[1] = ParamMat1;
+ this->state.data[2] = ParamMat2;
+ this->state.data[3] = ParamTmat;
+
+ {
+ const int num_init_iterations = std::max(seed_count + 1, MinimumInitIterations) - 1;
+
+ GenerateInitialValuePlus(&this->state, 0, seed_count);
+
+ for (int i = 0; i < num_init_iterations; i++) {
+ GenerateInitialValuePlus(&this->state, (i + 1) % NumStateWords,
+ (i < seed_count) ? seed[i] : 0);
+ }
+
+ for (int i = 0; i < static_cast<int>(NumStateWords); i++) {
+ GenerateInitialValueXor(&this->state,
+ (i + 1 + num_init_iterations) % NumStateWords);
+ }
+ }
+
+ this->FinalizeInitialization();
+ }
+
+ // State management.
+ void GetState(TinyMT::State& out) const {
+ out.data = this->state.data;
+ }
+
+ void SetState(const TinyMT::State& state_) {
+ this->state.data = state_.data;
+ }
+
+ // Random generation.
+ void GenerateRandomBytes(void* dst, std::size_t size) {
+ const uintptr_t start = reinterpret_cast<uintptr_t>(dst);
+ const uintptr_t end = start + size;
+ const uintptr_t aligned_start = Common::AlignUp(start, 4);
+ const uintptr_t aligned_end = Common::AlignDown(end, 4);
+
+ // Make sure we're aligned.
+ if (start < aligned_start) {
+ const u32 rnd = this->GenerateRandomU32();
+ std::memcpy(dst, &rnd, aligned_start - start);
+ }
+
+ // Write as many aligned u32s as we can.
+ {
+ u32* cur_dst = reinterpret_cast<u32*>(aligned_start);
+ u32* const end_dst = reinterpret_cast<u32*>(aligned_end);
+
+ while (cur_dst < end_dst) {
+ *(cur_dst++) = this->GenerateRandomU32();
+ }
+ }
+
+ // Handle any leftover unaligned data.
+ if (aligned_end < end) {
+ const u32 rnd = this->GenerateRandomU32();
+ std::memcpy(reinterpret_cast<void*>(aligned_end), &rnd, end - aligned_end);
+ }
+ }
+
+ u32 GenerateRandomU32() {
+ // Advance state.
+ const u32 x0 =
+ (this->state.data[0] & TopBitmask) ^ this->state.data[1] ^ this->state.data[2];
+ const u32 y0 = this->state.data[3];
+ const u32 x1 = x0 ^ (x0 << 1);
+ const u32 y1 = y0 ^ (y0 >> 1) ^ x1;
+
+ const u32 state0 = this->state.data[1];
+ u32 state1 = this->state.data[2];
+ u32 state2 = x1 ^ (y1 << 10);
+ const u32 state3 = y1;
+
+ if ((y1 & 1) != 0) {
+ state1 ^= ParamMat1;
+ state2 ^= ParamMat2;
+ }
+
+ this->state.data[0] = state0;
+ this->state.data[1] = state1;
+ this->state.data[2] = state2;
+ this->state.data[3] = state3;
+
+ // Temper.
+ const u32 t1 = state0 + (state2 >> 8);
+ u32 t0 = state3 ^ t1;
+
+ if ((t1 & 1) != 0) {
+ t0 ^= ParamTmat;
+ }
+
+ return t0;
+ }
+
+ u64 GenerateRandomU64() {
+ const u32 lo = this->GenerateRandomU32();
+ const u32 hi = this->GenerateRandomU32();
+ return (u64{hi} << 32) | u64{lo};
+ }
+
+ float GenerateRandomF32() {
+ // Floats have 24 bits of mantissa.
+ constexpr u32 MantissaBits = 24;
+ return static_cast<float>(GenerateRandomU24()) * (1.0f / (1U << MantissaBits));
+ }
+
+ double GenerateRandomF64() {
+ // Doubles have 53 bits of mantissa.
+ // The smart way to generate 53 bits of random would be to use 32 bits
+ // from the first rnd32() call, and then 21 from the second.
+ // Nintendo does not. They use (32 - 5) = 27 bits from the first rnd32()
+ // call, and (32 - 6) bits from the second. We'll do what they do, but
+ // There's not a clear reason why.
+ constexpr u32 MantissaBits = 53;
+ constexpr u32 Shift1st = (64 - MantissaBits) / 2;
+ constexpr u32 Shift2nd = (64 - MantissaBits) - Shift1st;
+
+ const u32 first = (this->GenerateRandomU32() >> Shift1st);
+ const u32 second = (this->GenerateRandomU32() >> Shift2nd);
+
+ return (1.0 * first * (u64{1} << (32 - Shift2nd)) + second) *
+ (1.0 / (u64{1} << MantissaBits));
+ }
+};
+
+} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
deleted file mode 100644
index 16bf7c828..000000000
--- a/src/common/uint128.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#ifdef _MSC_VER
-#include <intrin.h>
-
-#pragma intrinsic(_umul128)
-#pragma intrinsic(_udiv128)
-#endif
-#include <cstring>
-#include "common/uint128.h"
-
-namespace Common {
-
-#ifdef _MSC_VER
-
-u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
- u128 r{};
- r[0] = _umul128(a, b, &r[1]);
- u64 remainder;
-#if _MSC_VER < 1923
- return udiv128(r[1], r[0], d, &remainder);
-#else
- return _udiv128(r[1], r[0], d, &remainder);
-#endif
-}
-
-#else
-
-u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
- const u64 diva = a / d;
- const u64 moda = a % d;
- const u64 divb = b / d;
- const u64 modb = b % d;
- return diva * b + moda * divb + moda * modb / d;
-}
-
-#endif
-
-u128 Multiply64Into128(u64 a, u64 b) {
- u128 result;
-#ifdef _MSC_VER
- result[0] = _umul128(a, b, &result[1]);
-#else
- unsigned __int128 tmp = a;
- tmp *= b;
- std::memcpy(&result, &tmp, sizeof(u128));
-#endif
- return result;
-}
-
-std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
- u64 remainder = dividend[0] % divisor;
- u64 accum = dividend[0] / divisor;
- if (dividend[1] == 0)
- return {accum, remainder};
- // We ignore dividend[1] / divisor as that overflows
- const u64 first_segment = (dividend[1] % divisor) << 32;
- accum += (first_segment / divisor) << 32;
- const u64 second_segment = (first_segment % divisor) << 32;
- accum += (second_segment / divisor);
- remainder += second_segment % divisor;
- if (remainder >= divisor) {
- accum++;
- remainder -= divisor;
- }
- return {accum, remainder};
-}
-
-} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
index 969259ab6..4780b2f9d 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -4,19 +4,118 @@
#pragma once
+#include <cstring>
#include <utility>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#pragma intrinsic(__umulh)
+#pragma intrinsic(_umul128)
+#pragma intrinsic(_udiv128)
+#else
+#include <x86intrin.h>
+#endif
+
#include "common/common_types.h"
namespace Common {
// This function multiplies 2 u64 values and divides it by a u64 value.
-[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
+[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+#ifdef _MSC_VER
+ u128 r{};
+ r[0] = _umul128(a, b, &r[1]);
+ u64 remainder;
+#if _MSC_VER < 1923
+ return udiv128(r[1], r[0], d, &remainder);
+#else
+ return _udiv128(r[1], r[0], d, &remainder);
+#endif
+#else
+ const u64 diva = a / d;
+ const u64 moda = a % d;
+ const u64 divb = b / d;
+ const u64 modb = b % d;
+ return diva * b + moda * divb + moda * modb / d;
+#endif
+}
// This function multiplies 2 u64 values and produces a u128 value;
-[[nodiscard]] u128 Multiply64Into128(u64 a, u64 b);
+[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
+ u128 result;
+#ifdef _MSC_VER
+ result[0] = _umul128(a, b, &result[1]);
+#else
+ unsigned __int128 tmp = a;
+ tmp *= b;
+ std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+ return result;
+}
+
+[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
+#ifdef __SIZEOF_INT128__
+ const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
+ return static_cast<u64>(base / divisor);
+#elif defined(_M_X64) || defined(_M_ARM64)
+ std::array<u64, 2> r = {0, numerator};
+ u64 remainder;
+#if _MSC_VER < 1923
+ return udiv128(r[1], r[0], divisor, &remainder);
+#else
+ return _udiv128(r[1], r[0], divisor, &remainder);
+#endif
+#else
+ // This one is bit more inaccurate.
+ return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
+#endif
+}
+
+[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
+#ifdef __SIZEOF_INT128__
+ return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
+#elif defined(_M_X64) || defined(_M_ARM64)
+ return __umulh(a, b); // MSVC
+#else
+ // Generic fallback
+ const u64 a_lo = u32(a);
+ const u64 a_hi = a >> 32;
+ const u64 b_lo = u32(b);
+ const u64 b_hi = b >> 32;
+
+ const u64 a_x_b_hi = a_hi * b_hi;
+ const u64 a_x_b_mid = a_hi * b_lo;
+ const u64 b_x_a_mid = b_hi * a_lo;
+ const u64 a_x_b_lo = a_lo * b_lo;
+
+ const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
+ static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
+ 32;
+
+ const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
+
+ return multhi;
+#endif
+}
// This function divides a u128 by a u32 value and produces two u64 values:
// the result of division and the remainder
-[[nodiscard]] std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+ u64 remainder = dividend[0] % divisor;
+ u64 accum = dividend[0] / divisor;
+ if (dividend[1] == 0)
+ return {accum, remainder};
+ // We ignore dividend[1] / divisor as that overflows
+ const u64 first_segment = (dividend[1] % divisor) << 32;
+ accum += (first_segment / divisor) << 32;
+ const u64 second_segment = (first_segment % divisor) << 32;
+ accum += (second_segment / divisor);
+ remainder += second_segment % divisor;
+ if (remainder >= divisor) {
+ accum++;
+ remainder -= divisor;
+ }
+ return {accum, remainder};
+}
} // namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index a8c143f85..49830b8ab 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstdint>
+
#include "common/uint128.h"
#include "common/wall_clock.h"
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index a65f6b832..87de40624 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -8,68 +8,10 @@
#include <mutex>
#include <thread>
-#ifdef _MSC_VER
-#include <intrin.h>
-
-#pragma intrinsic(__umulh)
-#pragma intrinsic(_udiv128)
-#else
-#include <x86intrin.h>
-#endif
-
#include "common/atomic_ops.h"
#include "common/uint128.h"
#include "common/x64/native_clock.h"
-namespace {
-
-[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
-#ifdef __SIZEOF_INT128__
- const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
- return static_cast<u64>(base / divisor);
-#elif defined(_M_X64) || defined(_M_ARM64)
- std::array<u64, 2> r = {0, numerator};
- u64 remainder;
-#if _MSC_VER < 1923
- return udiv128(r[1], r[0], divisor, &remainder);
-#else
- return _udiv128(r[1], r[0], divisor, &remainder);
-#endif
-#else
- // This one is bit more inaccurate.
- return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
-#endif
-}
-
-[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
-#ifdef __SIZEOF_INT128__
- return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
-#elif defined(_M_X64) || defined(_M_ARM64)
- return __umulh(a, b); // MSVC
-#else
- // Generic fallback
- const u64 a_lo = u32(a);
- const u64 a_hi = a >> 32;
- const u64 b_lo = u32(b);
- const u64 b_hi = b >> 32;
-
- const u64 a_x_b_hi = a_hi * b_hi;
- const u64 a_x_b_mid = a_hi * b_lo;
- const u64 b_x_a_mid = b_hi * a_lo;
- const u64 a_x_b_lo = a_lo * b_lo;
-
- const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
- static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
- 32;
-
- const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
-
- return multhi;
-#endif
-}
-
-} // namespace
-
namespace Common {
u64 EstimateRDTSCFrequency() {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 386d7bddf..17f251c37 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -19,7 +19,6 @@ add_library(core STATIC
core.h
core_timing.cpp
core_timing.h
- core_timing_util.cpp
core_timing_util.h
cpu_manager.cpp
cpu_manager.h
@@ -148,7 +147,7 @@ add_library(core STATIC
hle/kernel/client_session.h
hle/kernel/code_set.cpp
hle/kernel/code_set.h
- hle/kernel/errors.h
+ hle/kernel/svc_results.h
hle/kernel/global_scheduler_context.cpp
hle/kernel/global_scheduler_context.h
hle/kernel/handle_table.cpp
@@ -157,6 +156,8 @@ add_library(core STATIC
hle/kernel/hle_ipc.h
hle/kernel/k_address_arbiter.cpp
hle/kernel/k_address_arbiter.h
+ hle/kernel/k_address_space_info.cpp
+ hle/kernel/k_address_space_info.h
hle/kernel/k_affinity_mask.h
hle/kernel/k_condition_variable.cpp
hle/kernel/k_condition_variable.h
@@ -165,6 +166,18 @@ add_library(core STATIC
hle/kernel/k_light_condition_variable.h
hle/kernel/k_light_lock.cpp
hle/kernel/k_light_lock.h
+ hle/kernel/k_memory_block.h
+ hle/kernel/k_memory_block_manager.cpp
+ hle/kernel/k_memory_block_manager.h
+ hle/kernel/k_memory_layout.h
+ hle/kernel/k_memory_manager.cpp
+ hle/kernel/k_memory_manager.h
+ hle/kernel/k_page_bitmap.h
+ hle/kernel/k_page_heap.cpp
+ hle/kernel/k_page_heap.h
+ hle/kernel/k_page_linked_list.h
+ hle/kernel/k_page_table.cpp
+ hle/kernel/k_page_table.h
hle/kernel/k_priority_queue.h
hle/kernel/k_readable_event.cpp
hle/kernel/k_readable_event.h
@@ -174,9 +187,17 @@ add_library(core STATIC
hle/kernel/k_scheduler.h
hle/kernel/k_scheduler_lock.h
hle/kernel/k_scoped_lock.h
+ hle/kernel/k_scoped_resource_reservation.h
hle/kernel/k_scoped_scheduler_lock_and_sleep.h
+ hle/kernel/k_shared_memory.cpp
+ hle/kernel/k_shared_memory.h
+ hle/kernel/k_slab_heap.h
+ hle/kernel/k_spin_lock.cpp
+ hle/kernel/k_spin_lock.h
hle/kernel/k_synchronization_object.cpp
hle/kernel/k_synchronization_object.h
+ hle/kernel/k_system_control.cpp
+ hle/kernel/k_system_control.h
hle/kernel/k_thread.cpp
hle/kernel/k_thread.h
hle/kernel/k_thread_queue.h
@@ -184,23 +205,7 @@ add_library(core STATIC
hle/kernel/k_writable_event.h
hle/kernel/kernel.cpp
hle/kernel/kernel.h
- hle/kernel/memory/address_space_info.cpp
- hle/kernel/memory/address_space_info.h
- hle/kernel/memory/memory_block.h
- hle/kernel/memory/memory_block_manager.cpp
- hle/kernel/memory/memory_block_manager.h
- hle/kernel/memory/memory_layout.h
- hle/kernel/memory/memory_manager.cpp
- hle/kernel/memory/memory_manager.h
- hle/kernel/memory/memory_types.h
- hle/kernel/memory/page_linked_list.h
- hle/kernel/memory/page_heap.cpp
- hle/kernel/memory/page_heap.h
- hle/kernel/memory/page_table.cpp
- hle/kernel/memory/page_table.h
- hle/kernel/memory/slab_heap.h
- hle/kernel/memory/system_control.cpp
- hle/kernel/memory/system_control.h
+ hle/kernel/memory_types.h
hle/kernel/object.cpp
hle/kernel/object.h
hle/kernel/physical_core.cpp
@@ -218,12 +223,9 @@ add_library(core STATIC
hle/kernel/service_thread.h
hle/kernel/session.cpp
hle/kernel/session.h
- hle/kernel/shared_memory.cpp
- hle/kernel/shared_memory.h
hle/kernel/svc.cpp
hle/kernel/svc.h
hle/kernel/svc_common.h
- hle/kernel/svc_results.h
hle/kernel/svc_types.h
hle/kernel/svc_wrap.h
hle/kernel/time_manager.cpp
@@ -266,6 +268,7 @@ add_library(core STATIC
hle/service/am/applets/software_keyboard.h
hle/service/am/applets/web_browser.cpp
hle/service/am/applets/web_browser.h
+ hle/service/am/applets/web_types.h
hle/service/am/idle.cpp
hle/service/am/idle.h
hle/service/am/omm.cpp
@@ -400,6 +403,7 @@ add_library(core STATIC
hle/service/hid/controllers/xpad.h
hle/service/lbl/lbl.cpp
hle/service/lbl/lbl.h
+ hle/service/ldn/errors.h
hle/service/ldn/ldn.cpp
hle/service/ldn/ldn.h
hle/service/ldr/ldr.cpp
@@ -653,6 +657,8 @@ else()
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
+ $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
+
-Wno-sign-conversion
)
endif()
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 30f5e1128..de6305e2a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -308,6 +308,9 @@ struct System::Impl {
// Close all CPU/threading state
cpu_manager.Shutdown();
+ // Release the Time Manager's resources
+ time_manager.Shutdown();
+
// Shutdown kernel and core timing
core_timing.Shutdown();
kernel.Shutdown();
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
deleted file mode 100644
index 8ce8e602e..000000000
--- a/src/core/core_timing_util.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#include "core/core_timing_util.h"
-
-#include <cinttypes>
-#include <limits>
-#include "common/logging/log.h"
-#include "common/uint128.h"
-#include "core/hardware_properties.h"
-
-namespace Core::Timing {
-
-constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;
-
-s64 msToCycles(std::chrono::milliseconds ms) {
- if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
- LOG_ERROR(Core_Timing, "Integer overflow, use max value");
- return std::numeric_limits<s64>::max();
- }
- if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
- LOG_DEBUG(Core_Timing, "Time very big, do rounding");
- return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
- }
- return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
-}
-
-s64 usToCycles(std::chrono::microseconds us) {
- if (static_cast<u64>(us.count() / 1000000) > MAX_VALUE_TO_MULTIPLY) {
- LOG_ERROR(Core_Timing, "Integer overflow, use max value");
- return std::numeric_limits<s64>::max();
- }
- if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
- LOG_DEBUG(Core_Timing, "Time very big, do rounding");
- return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
- }
- return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
-}
-
-s64 nsToCycles(std::chrono::nanoseconds ns) {
- const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
- return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
-}
-
-u64 msToClockCycles(std::chrono::milliseconds ns) {
- const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
- return Common::Divide128On32(temp, 1000).first;
-}
-
-u64 usToClockCycles(std::chrono::microseconds ns) {
- const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
- return Common::Divide128On32(temp, 1000000).first;
-}
-
-u64 nsToClockCycles(std::chrono::nanoseconds ns) {
- const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
- return Common::Divide128On32(temp, 1000000000).first;
-}
-
-u64 CpuCyclesToClockCycles(u64 ticks) {
- const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
- return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-}
-
-std::chrono::milliseconds CyclesToMs(s64 cycles) {
- const u128 temporal = Common::Multiply64Into128(cycles, 1000);
- u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
- return std::chrono::milliseconds(ms);
-}
-
-std::chrono::nanoseconds CyclesToNs(s64 cycles) {
- const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
- u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
- return std::chrono::nanoseconds(ns);
-}
-
-std::chrono::microseconds CyclesToUs(s64 cycles) {
- const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
- u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
- return std::chrono::microseconds(us);
-}
-
-} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index e4a046bf9..14c36a485 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -1,24 +1,59 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <chrono>
+
#include "common/common_types.h"
+#include "core/hardware_properties.h"
namespace Core::Timing {
-s64 msToCycles(std::chrono::milliseconds ms);
-s64 usToCycles(std::chrono::microseconds us);
-s64 nsToCycles(std::chrono::nanoseconds ns);
-u64 msToClockCycles(std::chrono::milliseconds ns);
-u64 usToClockCycles(std::chrono::microseconds ns);
-u64 nsToClockCycles(std::chrono::nanoseconds ns);
-std::chrono::milliseconds CyclesToMs(s64 cycles);
-std::chrono::nanoseconds CyclesToNs(s64 cycles);
-std::chrono::microseconds CyclesToUs(s64 cycles);
-
-u64 CpuCyclesToClockCycles(u64 ticks);
+namespace detail {
+constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
+constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
+} // namespace detail
+
+[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
+ return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
+}
+
+[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
+ return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
+}
+
+[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
+ return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
+}
+
+[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
+ return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
+}
+
+[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
+ return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
+}
+
+[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
+ return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
+}
+
+[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
+ return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
+}
+
+[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
+ return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
+
+[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
+ return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
+
+[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
+ return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
} // namespace Core::Timing
diff --git a/src/core/frontend/applets/controller.h b/src/core/frontend/applets/controller.h
index dff71d8d9..b0626a0f9 100644
--- a/src/core/frontend/applets/controller.h
+++ b/src/core/frontend/applets/controller.h
@@ -31,6 +31,7 @@ struct ControllerParameters {
bool allow_dual_joycons{};
bool allow_left_joycon{};
bool allow_right_joycon{};
+ bool allow_gamecube_controller{};
};
class ControllerApplet {
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index f8f005f15..0b6957e31 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -4,11 +4,11 @@
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/server_port.h"
#include "core/hle/kernel/session.h"
+#include "core/hle/kernel/svc_results.h"
namespace Kernel {
@@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const {
ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
if (active_sessions >= max_sessions) {
- return ERR_MAX_CONNECTIONS_REACHED;
+ return ResultMaxConnectionsReached;
}
active_sessions++;
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index a2be1a8f6..e230f365a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -3,11 +3,11 @@
// Refer to the license.txt file included.
#include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/server_session.h"
#include "core/hle/kernel/session.h"
+#include "core/hle/kernel/svc_results.h"
#include "core/hle/result.h"
namespace Kernel {
@@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread,
Core::Timing::CoreTiming& core_timing) {
// Keep ServerSession alive until we're done working with it.
if (!parent->Server()) {
- return ERR_SESSION_CLOSED_BY_REMOTE;
+ return ResultSessionClosedByRemote;
}
// Signal the server session that new data is available
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
deleted file mode 100644
index 7d32a39f0..000000000
--- a/src/core/hle/kernel/errors.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "core/hle/result.h"
-
-namespace Kernel {
-
-// Confirmed Switch kernel error codes
-
-constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
-constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
-constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
-constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
-constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
-constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
-constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
-constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
-constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
-constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
-constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
-constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
-constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
-constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
-constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112};
-constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114};
-constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
-constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
-constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
-constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
-constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
-constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
-constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
-constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
-constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122};
-constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123};
-constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125};
-constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126};
-constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132};
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 1a2fa9cd8..f96d34078 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -6,12 +6,12 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
namespace Kernel {
namespace {
@@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default;
ResultCode HandleTable::SetSize(s32 handle_table_size) {
if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT);
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
// Values less than or equal to zero indicate to use the maximum allowable
@@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) {
const u16 slot = next_free_slot;
if (slot >= table_size) {
LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
- return ERR_HANDLE_TABLE_FULL;
+ return ResultHandleTableFull;
}
next_free_slot = generations[slot];
@@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
std::shared_ptr<Object> object = GetGeneric(handle);
if (object == nullptr) {
LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
return Create(std::move(object));
}
@@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
ResultCode HandleTable::Close(Handle handle) {
if (!IsValid(handle)) {
LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
const u16 slot = GetSlot(handle);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 7ec62cf18..161d9f782 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,7 +14,6 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/kernel/k_readable_event.h"
@@ -26,6 +25,7 @@
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/svc_results.h"
#include "core/hle/kernel/time_manager.h"
#include "core/memory.h"
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index d0e90fd60..7018f56da 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
s32 user_value{};
if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) {
LOG_ERROR(Kernel, "Invalid current memory!");
- return Svc::ResultInvalidCurrentMemory;
+ return ResultInvalidCurrentMemory;
}
if (user_value != value) {
- return Svc::ResultInvalidState;
+ return ResultInvalidState;
}
auto it = thread_tree.nfind_light({addr, -1});
@@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
if (!succeeded) {
LOG_ERROR(Kernel, "Invalid current memory!");
- return Svc::ResultInvalidCurrentMemory;
+ return ResultInvalidCurrentMemory;
}
if (user_value != value) {
- return Svc::ResultInvalidState;
+ return ResultInvalidState;
}
while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
@@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
// Check that the thread isn't terminating.
if (cur_thread->IsTerminationRequested()) {
slp.CancelSleep();
- return Svc::ResultTerminationRequested;
+ return ResultTerminationRequested;
}
// Set the synced object.
- cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+ cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
// Read the value from userspace.
s32 user_value{};
@@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
if (!succeeded) {
slp.CancelSleep();
- return Svc::ResultInvalidCurrentMemory;
+ return ResultInvalidCurrentMemory;
}
// Check that the value is less than the specified one.
if (user_value >= value) {
slp.CancelSleep();
- return Svc::ResultInvalidState;
+ return ResultInvalidState;
}
// Check that the timeout is non-zero.
if (timeout == 0) {
slp.CancelSleep();
- return Svc::ResultTimedOut;
+ return ResultTimedOut;
}
// Set the arbiter.
@@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
// Check that the thread isn't terminating.
if (cur_thread->IsTerminationRequested()) {
slp.CancelSleep();
- return Svc::ResultTerminationRequested;
+ return ResultTerminationRequested;
}
// Set the synced object.
- cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+ cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
// Read the value from userspace.
s32 user_value{};
if (!ReadFromUser(system, &user_value, addr)) {
slp.CancelSleep();
- return Svc::ResultInvalidCurrentMemory;
+ return ResultInvalidCurrentMemory;
}
// Check that the value is equal.
if (value != user_value) {
slp.CancelSleep();
- return Svc::ResultInvalidState;
+ return ResultInvalidState;
}
// Check that the timeout is non-zero.
if (timeout == 0) {
slp.CancelSleep();
- return Svc::ResultTimedOut;
+ return ResultTimedOut;
}
// Set the arbiter.
diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp
index 6cf43ba24..24944d15b 100644
--- a/src/core/hle/kernel/memory/address_space_info.cpp
+++ b/src/core/hle/kernel/k_address_space_info.cpp
@@ -2,15 +2,12 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-// This file references various implementation details from Atmosphere, an open-source firmware for
-// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
-
#include <array>
#include "common/assert.h"
-#include "core/hle/kernel/memory/address_space_info.h"
+#include "core/hle/kernel/k_address_space_info.h"
-namespace Kernel::Memory {
+namespace Kernel {
namespace {
@@ -28,20 +25,20 @@ enum : u64 {
};
// clang-format off
-constexpr std::array<AddressSpaceInfo, 13> AddressSpaceInfos{{
- { .bit_width = 32, .address = Size_2_MB , .size = Size_1_GB - Size_2_MB , .type = AddressSpaceInfo::Type::Is32Bit, },
- { .bit_width = 32, .address = Size_1_GB , .size = Size_4_GB - Size_1_GB , .type = AddressSpaceInfo::Type::Small64Bit, },
- { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = AddressSpaceInfo::Type::Heap, },
- { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = AddressSpaceInfo::Type::Alias, },
- { .bit_width = 36, .address = Size_128_MB, .size = Size_2_GB - Size_128_MB, .type = AddressSpaceInfo::Type::Is32Bit, },
- { .bit_width = 36, .address = Size_2_GB , .size = Size_64_GB - Size_2_GB , .type = AddressSpaceInfo::Type::Small64Bit, },
- { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = AddressSpaceInfo::Type::Heap, },
- { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = AddressSpaceInfo::Type::Alias, },
- { .bit_width = 39, .address = Size_128_MB, .size = Size_512_GB - Size_128_MB, .type = AddressSpaceInfo::Type::Large64Bit, },
- { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = AddressSpaceInfo::Type::Is32Bit },
- { .bit_width = 39, .address = Invalid , .size = Size_6_GB , .type = AddressSpaceInfo::Type::Heap, },
- { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = AddressSpaceInfo::Type::Alias, },
- { .bit_width = 39, .address = Invalid , .size = Size_2_GB , .type = AddressSpaceInfo::Type::Stack, },
+constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{
+ { .bit_width = 32, .address = Size_2_MB , .size = Size_1_GB - Size_2_MB , .type = KAddressSpaceInfo::Type::MapSmall, },
+ { .bit_width = 32, .address = Size_1_GB , .size = Size_4_GB - Size_1_GB , .type = KAddressSpaceInfo::Type::MapLarge, },
+ { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = KAddressSpaceInfo::Type::Heap, },
+ { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = KAddressSpaceInfo::Type::Alias, },
+ { .bit_width = 36, .address = Size_128_MB, .size = Size_2_GB - Size_128_MB, .type = KAddressSpaceInfo::Type::MapSmall, },
+ { .bit_width = 36, .address = Size_2_GB , .size = Size_64_GB - Size_2_GB , .type = KAddressSpaceInfo::Type::MapLarge, },
+ { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = KAddressSpaceInfo::Type::Heap, },
+ { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = KAddressSpaceInfo::Type::Alias, },
+ { .bit_width = 39, .address = Size_128_MB, .size = Size_512_GB - Size_128_MB, .type = KAddressSpaceInfo::Type::Map39Bit, },
+ { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = KAddressSpaceInfo::Type::MapSmall },
+ { .bit_width = 39, .address = Invalid , .size = Size_6_GB , .type = KAddressSpaceInfo::Type::Heap, },
+ { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = KAddressSpaceInfo::Type::Alias, },
+ { .bit_width = 39, .address = Invalid , .size = Size_2_GB , .type = KAddressSpaceInfo::Type::Stack, },
}};
// clang-format on
@@ -49,7 +46,8 @@ constexpr bool IsAllowedIndexForAddress(std::size_t index) {
return index < AddressSpaceInfos.size() && AddressSpaceInfos[index].address != Invalid;
}
-using IndexArray = std::array<std::size_t, static_cast<std::size_t>(AddressSpaceInfo::Type::Count)>;
+using IndexArray =
+ std::array<std::size_t, static_cast<std::size_t>(KAddressSpaceInfo::Type::Count)>;
constexpr IndexArray AddressSpaceIndices32Bit{
0, 1, 0, 2, 0, 3,
@@ -63,23 +61,23 @@ constexpr IndexArray AddressSpaceIndices39Bit{
9, 8, 8, 10, 12, 11,
};
-constexpr bool IsAllowed32BitType(AddressSpaceInfo::Type type) {
- return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Large64Bit &&
- type != AddressSpaceInfo::Type::Stack;
+constexpr bool IsAllowed32BitType(KAddressSpaceInfo::Type type) {
+ return type < KAddressSpaceInfo::Type::Count && type != KAddressSpaceInfo::Type::Map39Bit &&
+ type != KAddressSpaceInfo::Type::Stack;
}
-constexpr bool IsAllowed36BitType(AddressSpaceInfo::Type type) {
- return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Large64Bit &&
- type != AddressSpaceInfo::Type::Stack;
+constexpr bool IsAllowed36BitType(KAddressSpaceInfo::Type type) {
+ return type < KAddressSpaceInfo::Type::Count && type != KAddressSpaceInfo::Type::Map39Bit &&
+ type != KAddressSpaceInfo::Type::Stack;
}
-constexpr bool IsAllowed39BitType(AddressSpaceInfo::Type type) {
- return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Small64Bit;
+constexpr bool IsAllowed39BitType(KAddressSpaceInfo::Type type) {
+ return type < KAddressSpaceInfo::Type::Count && type != KAddressSpaceInfo::Type::MapLarge;
}
} // namespace
-u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) {
+u64 KAddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) {
const std::size_t index{static_cast<std::size_t>(type)};
switch (width) {
case 32:
@@ -99,7 +97,7 @@ u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) {
return 0;
}
-std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) {
+std::size_t KAddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) {
const std::size_t index{static_cast<std::size_t>(type)};
switch (width) {
case 32:
@@ -116,4 +114,4 @@ std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type)
return 0;
}
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/address_space_info.h b/src/core/hle/kernel/k_address_space_info.h
index a4e6e91e5..06f31c6d5 100644
--- a/src/core/hle/kernel/memory/address_space_info.h
+++ b/src/core/hle/kernel/k_address_space_info.h
@@ -2,20 +2,17 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-// This file references various implementation details from Atmosphere, an open-source firmware for
-// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
-
#pragma once
#include "common/common_types.h"
-namespace Kernel::Memory {
+namespace Kernel {
-struct AddressSpaceInfo final {
+struct KAddressSpaceInfo final {
enum class Type : u32 {
- Is32Bit = 0,
- Small64Bit = 1,
- Large64Bit = 2,
+ MapSmall = 0,
+ MapLarge = 1,
+ Map39Bit = 2,
Heap = 3,
Stack = 4,
Alias = 5,
@@ -31,4 +28,4 @@ struct AddressSpaceInfo final {
const Type type{};
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index f0ad8b390..170d8fa0d 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
// Write the value to userspace.
if (!WriteToUser(system, addr, std::addressof(next_value))) {
if (next_owner_thread) {
- next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+ next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
}
- return Svc::ResultInvalidCurrentMemory;
+ return ResultInvalidCurrentMemory;
}
}
@@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val
cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
// Check if the thread should terminate.
- R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested);
+ R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested);
{
// Read the tag from userspace.
u32 test_tag{};
R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
- Svc::ResultInvalidCurrentMemory);
+ ResultInvalidCurrentMemory);
// If the tag isn't the handle (with wait mask), we're done.
R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);
// Get the lock owner thread.
owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle);
- R_UNLESS(owner_thread, Svc::ResultInvalidHandle);
+ R_UNLESS(owner_thread, ResultInvalidHandle);
// Update the lock.
cur_thread->SetAddressKey(addr, value);
@@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) {
thread_to_close = owner_thread.get();
} else {
// The lock was tagged with a thread that doesn't exist.
- thread->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+ thread->SetSyncedObject(nullptr, ResultInvalidState);
thread->Wakeup();
}
}
} else {
// If the address wasn't accessible, note so.
- thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+ thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
thread->Wakeup();
}
@@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout};
// Set the synced object.
- cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+ cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
// Check that the thread isn't terminating.
if (cur_thread->IsTerminationRequested()) {
slp.CancelSleep();
- return Svc::ResultTerminationRequested;
+ return ResultTerminationRequested;
}
// Update the value and process for the next owner.
@@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
// Write the value to userspace.
if (!WriteToUser(system, addr, std::addressof(next_value))) {
slp.CancelSleep();
- return Svc::ResultInvalidCurrentMemory;
+ return ResultInvalidCurrentMemory;
}
}
diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/k_memory_block.h
index 83acece1e..c5b9c5e85 100644
--- a/src/core/hle/kernel/memory/memory_block.h
+++ b/src/core/hle/kernel/k_memory_block.h
@@ -2,20 +2,17 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-// This file references various implementation details from Atmosphere, an open-source firmware for
-// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
-
#pragma once
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
-#include "core/hle/kernel/memory/memory_types.h"
+#include "core/hle/kernel/memory_types.h"
#include "core/hle/kernel/svc_types.h"
-namespace Kernel::Memory {
+namespace Kernel {
-enum class MemoryState : u32 {
+enum class KMemoryState : u32 {
None = 0,
Mask = 0xFF,
All = ~None,
@@ -97,31 +94,31 @@ enum class MemoryState : u32 {
FlagReferenceCounted | FlagCanDebug,
CodeOut = static_cast<u32>(Svc::MemoryState::CodeOut) | FlagMapped | FlagReferenceCounted,
};
-DECLARE_ENUM_FLAG_OPERATORS(MemoryState);
-
-static_assert(static_cast<u32>(MemoryState::Free) == 0x00000000);
-static_assert(static_cast<u32>(MemoryState::Io) == 0x00002001);
-static_assert(static_cast<u32>(MemoryState::Static) == 0x00042002);
-static_assert(static_cast<u32>(MemoryState::Code) == 0x00DC7E03);
-static_assert(static_cast<u32>(MemoryState::CodeData) == 0x03FEBD04);
-static_assert(static_cast<u32>(MemoryState::Normal) == 0x037EBD05);
-static_assert(static_cast<u32>(MemoryState::Shared) == 0x00402006);
-static_assert(static_cast<u32>(MemoryState::AliasCode) == 0x00DD7E08);
-static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09);
-static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A);
-static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B);
-static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C);
-static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D);
-static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E);
-static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F);
-static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010);
-static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811);
-static_assert(static_cast<u32>(MemoryState::NonDeviceIpc) == 0x004C2812);
-static_assert(static_cast<u32>(MemoryState::Kernel) == 0x00002013);
-static_assert(static_cast<u32>(MemoryState::GeneratedCode) == 0x00402214);
-static_assert(static_cast<u32>(MemoryState::CodeOut) == 0x00402015);
-
-enum class MemoryPermission : u8 {
+DECLARE_ENUM_FLAG_OPERATORS(KMemoryState);
+
+static_assert(static_cast<u32>(KMemoryState::Free) == 0x00000000);
+static_assert(static_cast<u32>(KMemoryState::Io) == 0x00002001);
+static_assert(static_cast<u32>(KMemoryState::Static) == 0x00042002);
+static_assert(static_cast<u32>(KMemoryState::Code) == 0x00DC7E03);
+static_assert(static_cast<u32>(KMemoryState::CodeData) == 0x03FEBD04);
+static_assert(static_cast<u32>(KMemoryState::Normal) == 0x037EBD05);
+static_assert(static_cast<u32>(KMemoryState::Shared) == 0x00402006);
+static_assert(static_cast<u32>(KMemoryState::AliasCode) == 0x00DD7E08);
+static_assert(static_cast<u32>(KMemoryState::AliasCodeData) == 0x03FFBD09);
+static_assert(static_cast<u32>(KMemoryState::Ipc) == 0x005C3C0A);
+static_assert(static_cast<u32>(KMemoryState::Stack) == 0x005C3C0B);
+static_assert(static_cast<u32>(KMemoryState::ThreadLocal) == 0x0040200C);
+static_assert(static_cast<u32>(KMemoryState::Transferred) == 0x015C3C0D);
+static_assert(static_cast<u32>(KMemoryState::SharedTransferred) == 0x005C380E);
+static_assert(static_cast<u32>(KMemoryState::SharedCode) == 0x0040380F);
+static_assert(static_cast<u32>(KMemoryState::Inaccessible) == 0x00000010);
+static_assert(static_cast<u32>(KMemoryState::NonSecureIpc) == 0x005C3811);
+static_assert(static_cast<u32>(KMemoryState::NonDeviceIpc) == 0x004C2812);
+static_assert(static_cast<u32>(KMemoryState::Kernel) == 0x00002013);
+static_assert(static_cast<u32>(KMemoryState::GeneratedCode) == 0x00402214);
+static_assert(static_cast<u32>(KMemoryState::CodeOut) == 0x00402015);
+
+enum class KMemoryPermission : u8 {
None = 0,
Mask = static_cast<u8>(~None),
@@ -135,9 +132,9 @@ enum class MemoryPermission : u8 {
UserMask = static_cast<u8>(Svc::MemoryPermission::Read | Svc::MemoryPermission::Write |
Svc::MemoryPermission::Execute),
};
-DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission);
+DECLARE_ENUM_FLAG_OPERATORS(KMemoryPermission);
-enum class MemoryAttribute : u8 {
+enum class KMemoryAttribute : u8 {
None = 0x00,
Mask = 0x7F,
All = Mask,
@@ -152,18 +149,18 @@ enum class MemoryAttribute : u8 {
LockedAndIpcLocked = Locked | IpcLocked,
DeviceSharedAndUncached = DeviceShared | Uncached
};
-DECLARE_ENUM_FLAG_OPERATORS(MemoryAttribute);
+DECLARE_ENUM_FLAG_OPERATORS(KMemoryAttribute);
-static_assert((static_cast<u8>(MemoryAttribute::Mask) &
- static_cast<u8>(MemoryAttribute::DontCareMask)) == 0);
+static_assert((static_cast<u8>(KMemoryAttribute::Mask) &
+ static_cast<u8>(KMemoryAttribute::DontCareMask)) == 0);
-struct MemoryInfo {
+struct KMemoryInfo {
VAddr addr{};
std::size_t size{};
- MemoryState state{};
- MemoryPermission perm{};
- MemoryAttribute attribute{};
- MemoryPermission original_perm{};
+ KMemoryState state{};
+ KMemoryPermission perm{};
+ KMemoryAttribute attribute{};
+ KMemoryPermission original_perm{};
u16 ipc_lock_count{};
u16 device_use_count{};
@@ -171,9 +168,9 @@ struct MemoryInfo {
return {
addr,
size,
- static_cast<Svc::MemoryState>(state & MemoryState::Mask),
- static_cast<Svc::MemoryAttribute>(attribute & MemoryAttribute::Mask),
- static_cast<Svc::MemoryPermission>(perm & MemoryPermission::UserMask),
+ static_cast<Svc::MemoryState>(state & KMemoryState::Mask),
+ static_cast<Svc::MemoryAttribute>(attribute & KMemoryAttribute::Mask),
+ static_cast<Svc::MemoryPermission>(perm & KMemoryPermission::UserMask),
ipc_lock_count,
device_use_count,
};
@@ -196,21 +193,21 @@ struct MemoryInfo {
}
};
-class MemoryBlock final {
- friend class MemoryBlockManager;
+class KMemoryBlock final {
+ friend class KMemoryBlockManager;
private:
VAddr addr{};
std::size_t num_pages{};
- MemoryState state{MemoryState::None};
+ KMemoryState state{KMemoryState::None};
u16 ipc_lock_count{};
u16 device_use_count{};
- MemoryPermission perm{MemoryPermission::None};
- MemoryPermission original_perm{MemoryPermission::None};
- MemoryAttribute attribute{MemoryAttribute::None};
+ KMemoryPermission perm{KMemoryPermission::None};
+ KMemoryPermission original_perm{KMemoryPermission::None};
+ KMemoryAttribute attribute{KMemoryAttribute::None};
public:
- static constexpr int Compare(const MemoryBlock& lhs, const MemoryBlock& rhs) {
+ static constexpr int Compare(const KMemoryBlock& lhs, const KMemoryBlock& rhs) {
if (lhs.GetAddress() < rhs.GetAddress()) {
return -1;
} else if (lhs.GetAddress() <= rhs.GetLastAddress()) {
@@ -221,9 +218,9 @@ public:
}
public:
- constexpr MemoryBlock() = default;
- constexpr MemoryBlock(VAddr addr_, std::size_t num_pages_, MemoryState state_,
- MemoryPermission perm_, MemoryAttribute attribute_)
+ constexpr KMemoryBlock() = default;
+ constexpr KMemoryBlock(VAddr addr_, std::size_t num_pages_, KMemoryState state_,
+ KMemoryPermission perm_, KMemoryAttribute attribute_)
: addr{addr_}, num_pages(num_pages_), state{state_}, perm{perm_}, attribute{attribute_} {}
constexpr VAddr GetAddress() const {
@@ -246,40 +243,40 @@ public:
return GetEndAddress() - 1;
}
- constexpr MemoryInfo GetMemoryInfo() const {
+ constexpr KMemoryInfo GetMemoryInfo() const {
return {
GetAddress(), GetSize(), state, perm,
attribute, original_perm, ipc_lock_count, device_use_count,
};
}
- void ShareToDevice(MemoryPermission /*new_perm*/) {
- ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared ||
+ void ShareToDevice(KMemoryPermission /*new_perm*/) {
+ ASSERT((attribute & KMemoryAttribute::DeviceShared) == KMemoryAttribute::DeviceShared ||
device_use_count == 0);
- attribute |= MemoryAttribute::DeviceShared;
+ attribute |= KMemoryAttribute::DeviceShared;
const u16 new_use_count{++device_use_count};
ASSERT(new_use_count > 0);
}
- void UnshareToDevice(MemoryPermission /*new_perm*/) {
- ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared);
+ void UnshareToDevice(KMemoryPermission /*new_perm*/) {
+ ASSERT((attribute & KMemoryAttribute::DeviceShared) == KMemoryAttribute::DeviceShared);
const u16 prev_use_count{device_use_count--};
ASSERT(prev_use_count > 0);
if (prev_use_count == 1) {
- attribute &= ~MemoryAttribute::DeviceShared;
+ attribute &= ~KMemoryAttribute::DeviceShared;
}
}
private:
- constexpr bool HasProperties(MemoryState s, MemoryPermission p, MemoryAttribute a) const {
- constexpr MemoryAttribute AttributeIgnoreMask{MemoryAttribute::DontCareMask |
- MemoryAttribute::IpcLocked |
- MemoryAttribute::DeviceShared};
+ constexpr bool HasProperties(KMemoryState s, KMemoryPermission p, KMemoryAttribute a) const {
+ constexpr KMemoryAttribute AttributeIgnoreMask{KMemoryAttribute::DontCareMask |
+ KMemoryAttribute::IpcLocked |
+ KMemoryAttribute::DeviceShared};
return state == s && perm == p &&
(attribute | AttributeIgnoreMask) == (a | AttributeIgnoreMask);
}
- constexpr bool HasSameProperties(const MemoryBlock& rhs) const {
+ constexpr bool HasSameProperties(const KMemoryBlock& rhs) const {
return state == rhs.state && perm == rhs.perm && original_perm == rhs.original_perm &&
attribute == rhs.attribute && ipc_lock_count == rhs.ipc_lock_count &&
device_use_count == rhs.device_use_count;
@@ -296,25 +293,25 @@ private:
num_pages += count;
}
- constexpr void Update(MemoryState new_state, MemoryPermission new_perm,
- MemoryAttribute new_attribute) {
- ASSERT(original_perm == MemoryPermission::None);
- ASSERT((attribute & MemoryAttribute::IpcLocked) == MemoryAttribute::None);
+ constexpr void Update(KMemoryState new_state, KMemoryPermission new_perm,
+ KMemoryAttribute new_attribute) {
+ ASSERT(original_perm == KMemoryPermission::None);
+ ASSERT((attribute & KMemoryAttribute::IpcLocked) == KMemoryAttribute::None);
state = new_state;
perm = new_perm;
- attribute = static_cast<MemoryAttribute>(
+ attribute = static_cast<KMemoryAttribute>(
new_attribute |
- (attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared)));
+ (attribute & (KMemoryAttribute::IpcLocked | KMemoryAttribute::DeviceShared)));
}
- constexpr MemoryBlock Split(VAddr split_addr) {
+ constexpr KMemoryBlock Split(VAddr split_addr) {
ASSERT(GetAddress() < split_addr);
ASSERT(Contains(split_addr));
ASSERT(Common::IsAligned(split_addr, PageSize));
- MemoryBlock block;
+ KMemoryBlock block;
block.addr = addr;
block.num_pages = (split_addr - GetAddress()) / PageSize;
block.state = state;
@@ -330,6 +327,6 @@ private:
return block;
}
};
-static_assert(std::is_trivially_destructible<MemoryBlock>::value);
+static_assert(std::is_trivially_destructible<KMemoryBlock>::value);
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_block_manager.cpp b/src/core/hle/kernel/k_memory_block_manager.cpp
index 0732fa5a1..4a2d88008 100644
--- a/src/core/hle/kernel/memory/memory_block_manager.cpp
+++ b/src/core/hle/kernel/k_memory_block_manager.cpp
@@ -2,19 +2,19 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/hle/kernel/memory/memory_block_manager.h"
-#include "core/hle/kernel/memory/memory_types.h"
+#include "core/hle/kernel/k_memory_block_manager.h"
+#include "core/hle/kernel/memory_types.h"
-namespace Kernel::Memory {
+namespace Kernel {
-MemoryBlockManager::MemoryBlockManager(VAddr start_addr, VAddr end_addr)
+KMemoryBlockManager::KMemoryBlockManager(VAddr start_addr, VAddr end_addr)
: start_addr{start_addr}, end_addr{end_addr} {
const u64 num_pages{(end_addr - start_addr) / PageSize};
- memory_block_tree.emplace_back(start_addr, num_pages, MemoryState::Free, MemoryPermission::None,
- MemoryAttribute::None);
+ memory_block_tree.emplace_back(start_addr, num_pages, KMemoryState::Free,
+ KMemoryPermission::None, KMemoryAttribute::None);
}
-MemoryBlockManager::iterator MemoryBlockManager::FindIterator(VAddr addr) {
+KMemoryBlockManager::iterator KMemoryBlockManager::FindIterator(VAddr addr) {
auto node{memory_block_tree.begin()};
while (node != end()) {
const VAddr end_addr{node->GetNumPages() * PageSize + node->GetAddress()};
@@ -26,9 +26,9 @@ MemoryBlockManager::iterator MemoryBlockManager::FindIterator(VAddr addr) {
return end();
}
-VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_num_pages,
- std::size_t num_pages, std::size_t align, std::size_t offset,
- std::size_t guard_pages) {
+VAddr KMemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_num_pages,
+ std::size_t num_pages, std::size_t align,
+ std::size_t offset, std::size_t guard_pages) {
if (num_pages == 0) {
return {};
}
@@ -41,7 +41,7 @@ VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_nu
break;
}
- if (info.state != MemoryState::Free) {
+ if (info.state != KMemoryState::Free) {
continue;
}
@@ -63,17 +63,17 @@ VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_nu
return {};
}
-void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState prev_state,
- MemoryPermission prev_perm, MemoryAttribute prev_attribute,
- MemoryState state, MemoryPermission perm,
- MemoryAttribute attribute) {
+void KMemoryBlockManager::Update(VAddr addr, std::size_t num_pages, KMemoryState prev_state,
+ KMemoryPermission prev_perm, KMemoryAttribute prev_attribute,
+ KMemoryState state, KMemoryPermission perm,
+ KMemoryAttribute attribute) {
const VAddr end_addr{addr + num_pages * PageSize};
iterator node{memory_block_tree.begin()};
- prev_attribute |= MemoryAttribute::IpcAndDeviceMapped;
+ prev_attribute |= KMemoryAttribute::IpcAndDeviceMapped;
while (node != memory_block_tree.end()) {
- MemoryBlock* block{&(*node)};
+ KMemoryBlock* block{&(*node)};
iterator next_node{std::next(node)};
const VAddr cur_addr{block->GetAddress()};
const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr};
@@ -106,13 +106,13 @@ void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState p
}
}
-void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState state,
- MemoryPermission perm, MemoryAttribute attribute) {
+void KMemoryBlockManager::Update(VAddr addr, std::size_t num_pages, KMemoryState state,
+ KMemoryPermission perm, KMemoryAttribute attribute) {
const VAddr end_addr{addr + num_pages * PageSize};
iterator node{memory_block_tree.begin()};
while (node != memory_block_tree.end()) {
- MemoryBlock* block{&(*node)};
+ KMemoryBlock* block{&(*node)};
iterator next_node{std::next(node)};
const VAddr cur_addr{block->GetAddress()};
const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr};
@@ -141,13 +141,13 @@ void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState s
}
}
-void MemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func,
- MemoryPermission perm) {
+void KMemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func,
+ KMemoryPermission perm) {
const VAddr end_addr{addr + num_pages * PageSize};
iterator node{memory_block_tree.begin()};
while (node != memory_block_tree.end()) {
- MemoryBlock* block{&(*node)};
+ KMemoryBlock* block{&(*node)};
iterator next_node{std::next(node)};
const VAddr cur_addr{block->GetAddress()};
const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr};
@@ -176,9 +176,9 @@ void MemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&
}
}
-void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) {
+void KMemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) {
const_iterator it{FindIterator(start)};
- MemoryInfo info{};
+ KMemoryInfo info{};
do {
info = it->GetMemoryInfo();
func(info);
@@ -186,8 +186,8 @@ void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& f
} while (info.addr + info.size - 1 < end - 1 && it != cend());
}
-void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) {
- MemoryBlock* block{&(*it)};
+void KMemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) {
+ KMemoryBlock* block{&(*it)};
auto EraseIt = [&](const iterator it_to_erase) {
if (next_it == it_to_erase) {
@@ -197,7 +197,7 @@ void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) {
};
if (it != memory_block_tree.begin()) {
- MemoryBlock* prev{&(*std::prev(it))};
+ KMemoryBlock* prev{&(*std::prev(it))};
if (block->HasSameProperties(*prev)) {
const iterator prev_it{std::prev(it)};
@@ -211,7 +211,7 @@ void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) {
}
if (it != cend()) {
- const MemoryBlock* const next{&(*std::next(it))};
+ const KMemoryBlock* const next{&(*std::next(it))};
if (block->HasSameProperties(*next)) {
block->Add(next->GetNumPages());
@@ -220,4 +220,4 @@ void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) {
}
}
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_block_manager.h b/src/core/hle/kernel/k_memory_block_manager.h
index f57d1bbcc..e11cc70c8 100644
--- a/src/core/hle/kernel/memory/memory_block_manager.h
+++ b/src/core/hle/kernel/k_memory_block_manager.h
@@ -8,18 +8,18 @@
#include <list>
#include "common/common_types.h"
-#include "core/hle/kernel/memory/memory_block.h"
+#include "core/hle/kernel/k_memory_block.h"
-namespace Kernel::Memory {
+namespace Kernel {
-class MemoryBlockManager final {
+class KMemoryBlockManager final {
public:
- using MemoryBlockTree = std::list<MemoryBlock>;
+ using MemoryBlockTree = std::list<KMemoryBlock>;
using iterator = MemoryBlockTree::iterator;
using const_iterator = MemoryBlockTree::const_iterator;
public:
- MemoryBlockManager(VAddr start_addr, VAddr end_addr);
+ KMemoryBlockManager(VAddr start_addr, VAddr end_addr);
iterator end() {
return memory_block_tree.end();
@@ -36,21 +36,22 @@ public:
VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages,
std::size_t align, std::size_t offset, std::size_t guard_pages);
- void Update(VAddr addr, std::size_t num_pages, MemoryState prev_state,
- MemoryPermission prev_perm, MemoryAttribute prev_attribute, MemoryState state,
- MemoryPermission perm, MemoryAttribute attribute);
+ void Update(VAddr addr, std::size_t num_pages, KMemoryState prev_state,
+ KMemoryPermission prev_perm, KMemoryAttribute prev_attribute, KMemoryState state,
+ KMemoryPermission perm, KMemoryAttribute attribute);
- void Update(VAddr addr, std::size_t num_pages, MemoryState state,
- MemoryPermission perm = MemoryPermission::None,
- MemoryAttribute attribute = MemoryAttribute::None);
+ void Update(VAddr addr, std::size_t num_pages, KMemoryState state,
+ KMemoryPermission perm = KMemoryPermission::None,
+ KMemoryAttribute attribute = KMemoryAttribute::None);
- using LockFunc = std::function<void(iterator, MemoryPermission)>;
- void UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, MemoryPermission perm);
+ using LockFunc = std::function<void(iterator, KMemoryPermission)>;
+ void UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func,
+ KMemoryPermission perm);
- using IterateFunc = std::function<void(const MemoryInfo&)>;
+ using IterateFunc = std::function<void(const KMemoryInfo&)>;
void IterateForRange(VAddr start, VAddr end, IterateFunc&& func);
- MemoryBlock& FindBlock(VAddr addr) {
+ KMemoryBlock& FindBlock(VAddr addr) {
return *FindIterator(addr);
}
@@ -63,4 +64,4 @@ private:
MemoryBlockTree memory_block_tree;
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_layout.h b/src/core/hle/kernel/k_memory_layout.h
index c7c0b2f49..0821d2d8c 100644
--- a/src/core/hle/kernel/memory/memory_layout.h
+++ b/src/core/hle/kernel/k_memory_layout.h
@@ -7,7 +7,7 @@
#include "common/common_types.h"
#include "core/device_memory.h"
-namespace Kernel::Memory {
+namespace Kernel {
constexpr std::size_t KernelAslrAlignment = 2 * 1024 * 1024;
constexpr std::size_t KernelVirtualAddressSpaceWidth = 1ULL << 39;
@@ -27,8 +27,8 @@ constexpr bool IsKernelAddress(VAddr address) {
return KernelVirtualAddressSpaceBase <= address && address < KernelVirtualAddressSpaceEnd;
}
-class MemoryRegion final {
- friend class MemoryLayout;
+class KMemoryRegion final {
+ friend class KMemoryLayout;
public:
constexpr PAddr StartAddress() const {
@@ -40,29 +40,29 @@ public:
}
private:
- constexpr MemoryRegion() = default;
- constexpr MemoryRegion(PAddr start_address, PAddr end_address)
+ constexpr KMemoryRegion() = default;
+ constexpr KMemoryRegion(PAddr start_address, PAddr end_address)
: start_address{start_address}, end_address{end_address} {}
const PAddr start_address{};
const PAddr end_address{};
};
-class MemoryLayout final {
+class KMemoryLayout final {
public:
- constexpr const MemoryRegion& Application() const {
+ constexpr const KMemoryRegion& Application() const {
return application;
}
- constexpr const MemoryRegion& Applet() const {
+ constexpr const KMemoryRegion& Applet() const {
return applet;
}
- constexpr const MemoryRegion& System() const {
+ constexpr const KMemoryRegion& System() const {
return system;
}
- static constexpr MemoryLayout GetDefaultLayout() {
+ static constexpr KMemoryLayout GetDefaultLayout() {
constexpr std::size_t application_size{0xcd500000};
constexpr std::size_t applet_size{0x1fb00000};
constexpr PAddr application_start_address{Core::DramMemoryMap::End - application_size};
@@ -76,15 +76,15 @@ public:
}
private:
- constexpr MemoryLayout(PAddr application_start_address, std::size_t application_size,
- PAddr applet_start_address, std::size_t applet_size,
- PAddr system_start_address, std::size_t system_size)
+ constexpr KMemoryLayout(PAddr application_start_address, std::size_t application_size,
+ PAddr applet_start_address, std::size_t applet_size,
+ PAddr system_start_address, std::size_t system_size)
: application{application_start_address, application_size},
applet{applet_start_address, applet_size}, system{system_start_address, system_size} {}
- const MemoryRegion application;
- const MemoryRegion applet;
- const MemoryRegion system;
+ const KMemoryRegion application;
+ const KMemoryRegion applet;
+ const KMemoryRegion system;
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp
index acf13585c..9027602bf 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/k_memory_manager.cpp
@@ -8,20 +8,20 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/memory/memory_manager.h"
-#include "core/hle/kernel/memory/page_linked_list.h"
+#include "core/hle/kernel/k_memory_manager.h"
+#include "core/hle/kernel/k_page_linked_list.h"
+#include "core/hle/kernel/svc_results.h"
-namespace Kernel::Memory {
+namespace Kernel {
-std::size_t MemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) {
+std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) {
const auto size{end_address - start_address};
// Calculate metadata sizes
const auto ref_count_size{(size / PageSize) * sizeof(u16)};
const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)};
const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)};
- const auto page_heap_size{PageHeap::CalculateMetadataOverheadSize(size)};
+ const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)};
const auto total_metadata_size{manager_size + page_heap_size};
ASSERT(manager_size <= total_metadata_size);
ASSERT(Common::IsAligned(total_metadata_size, PageSize));
@@ -41,29 +41,30 @@ std::size_t MemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u6
return total_metadata_size;
}
-void MemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) {
+void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) {
ASSERT(pool < Pool::Count);
managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address);
}
-VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align_pages, Pool pool,
- Direction dir) {
+VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages,
+ u32 option) {
// Early return if we're allocating no pages
if (num_pages == 0) {
return {};
}
// Lock the pool that we're allocating from
+ const auto [pool, dir] = DecodeOption(option);
const auto pool_index{static_cast<std::size_t>(pool)};
std::lock_guard lock{pool_locks[pool_index]};
// Choose a heap based on our page size request
- const s32 heap_index{PageHeap::GetAlignedBlockIndex(num_pages, align_pages)};
+ const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)};
// Loop, trying to iterate from each block
// TODO (bunnei): Support multiple managers
Impl& chosen_manager{managers[pool_index]};
- VAddr allocated_block{chosen_manager.AllocateBlock(heap_index)};
+ VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)};
// If we failed to allocate, quit now
if (!allocated_block) {
@@ -71,7 +72,7 @@ VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align
}
// If we allocated more than we need, free some
- const auto allocated_pages{PageHeap::GetBlockNumPages(heap_index)};
+ const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)};
if (allocated_pages > num_pages) {
chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
}
@@ -79,8 +80,8 @@ VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align
return allocated_block;
}
-ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pages, Pool pool,
- Direction dir) {
+ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
+ Direction dir) {
ASSERT(page_list.GetNumPages() == 0);
// Early return if we're allocating no pages
@@ -93,9 +94,9 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
std::lock_guard lock{pool_locks[pool_index]};
// Choose a heap based on our page size request
- const s32 heap_index{PageHeap::GetBlockIndex(num_pages)};
+ const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)};
if (heap_index < 0) {
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
// TODO (bunnei): Support multiple managers
@@ -112,11 +113,11 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
// Keep allocating until we've allocated all our pages
for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) {
- const auto pages_per_alloc{PageHeap::GetBlockNumPages(index)};
+ const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)};
while (num_pages >= pages_per_alloc) {
// Allocate a block
- VAddr allocated_block{chosen_manager.AllocateBlock(index)};
+ VAddr allocated_block{chosen_manager.AllocateBlock(index, false)};
if (!allocated_block) {
break;
}
@@ -140,7 +141,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
// Only succeed if we allocated as many pages as we wanted
if (num_pages) {
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
// We succeeded!
@@ -148,8 +149,8 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
return RESULT_SUCCESS;
}
-ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages, Pool pool,
- Direction dir) {
+ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
+ Direction dir) {
// Early return if we're freeing no pages
if (!num_pages) {
return RESULT_SUCCESS;
@@ -172,4 +173,4 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages,
return RESULT_SUCCESS;
}
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_manager.h b/src/core/hle/kernel/k_memory_manager.h
index 3cf444857..ae9f683b8 100644
--- a/src/core/hle/kernel/memory/memory_manager.h
+++ b/src/core/hle/kernel/k_memory_manager.h
@@ -6,16 +6,18 @@
#include <array>
#include <mutex>
+#include <tuple>
+#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "core/hle/kernel/memory/page_heap.h"
+#include "core/hle/kernel/k_page_heap.h"
#include "core/hle/result.h"
-namespace Kernel::Memory {
+namespace Kernel {
-class PageLinkedList;
+class KPageLinkedList;
-class MemoryManager final : NonCopyable {
+class KMemoryManager final : NonCopyable {
public:
enum class Pool : u32 {
Application = 0,
@@ -37,29 +39,50 @@ public:
Mask = (0xF << Shift),
};
- MemoryManager() = default;
+ KMemoryManager() = default;
constexpr std::size_t GetSize(Pool pool) const {
return managers[static_cast<std::size_t>(pool)].GetSize();
}
void InitializeManager(Pool pool, u64 start_address, u64 end_address);
- VAddr AllocateContinuous(std::size_t num_pages, std::size_t align_pages, Pool pool,
- Direction dir = Direction::FromFront);
- ResultCode Allocate(PageLinkedList& page_list, std::size_t num_pages, Pool pool,
+
+ VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
+ ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
Direction dir = Direction::FromFront);
- ResultCode Free(PageLinkedList& page_list, std::size_t num_pages, Pool pool,
+ ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool,
Direction dir = Direction::FromFront);
static constexpr std::size_t MaxManagerCount = 10;
+public:
+ static constexpr u32 EncodeOption(Pool pool, Direction dir) {
+ return (static_cast<u32>(pool) << static_cast<u32>(Pool::Shift)) |
+ (static_cast<u32>(dir) << static_cast<u32>(Direction::Shift));
+ }
+
+ static constexpr Pool GetPool(u32 option) {
+ return static_cast<Pool>((static_cast<u32>(option) & static_cast<u32>(Pool::Mask)) >>
+ static_cast<u32>(Pool::Shift));
+ }
+
+ static constexpr Direction GetDirection(u32 option) {
+ return static_cast<Direction>(
+ (static_cast<u32>(option) & static_cast<u32>(Direction::Mask)) >>
+ static_cast<u32>(Direction::Shift));
+ }
+
+ static constexpr std::tuple<Pool, Direction> DecodeOption(u32 option) {
+ return std::make_tuple(GetPool(option), GetDirection(option));
+ }
+
private:
class Impl final : NonCopyable {
private:
using RefCount = u16;
private:
- PageHeap heap;
+ KPageHeap heap;
Pool pool{};
public:
@@ -67,8 +90,8 @@ private:
std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address);
- VAddr AllocateBlock(s32 index) {
- return heap.AllocateBlock(index);
+ VAddr AllocateBlock(s32 index, bool random) {
+ return heap.AllocateBlock(index, random);
}
void Free(VAddr addr, std::size_t num_pages) {
@@ -93,4 +116,4 @@ private:
std::array<Impl, MaxManagerCount> managers;
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_bitmap.h b/src/core/hle/kernel/k_page_bitmap.h
new file mode 100644
index 000000000..c75d667c9
--- /dev/null
+++ b/src/core/hle/kernel/k_page_bitmap.h
@@ -0,0 +1,279 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bit>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "common/tiny_mt.h"
+#include "core/hle/kernel/k_system_control.h"
+
+namespace Kernel {
+
+class KPageBitmap {
+private:
+ class RandomBitGenerator {
+ private:
+ Common::TinyMT rng{};
+ u32 entropy{};
+ u32 bits_available{};
+
+ private:
+ void RefreshEntropy() {
+ entropy = rng.GenerateRandomU32();
+ bits_available = static_cast<u32>(Common::BitSize<decltype(entropy)>());
+ }
+
+ bool GenerateRandomBit() {
+ if (bits_available == 0) {
+ this->RefreshEntropy();
+ }
+
+ const bool rnd_bit = (entropy & 1) != 0;
+ entropy >>= 1;
+ --bits_available;
+ return rnd_bit;
+ }
+
+ public:
+ RandomBitGenerator() {
+ rng.Initialize(static_cast<u32>(KSystemControl::GenerateRandomU64()));
+ }
+
+ std::size_t SelectRandomBit(u64 bitmap) {
+ u64 selected = 0;
+
+ u64 cur_num_bits = Common::BitSize<decltype(bitmap)>() / 2;
+ u64 cur_mask = (1ULL << cur_num_bits) - 1;
+
+ while (cur_num_bits) {
+ const u64 low = (bitmap >> 0) & cur_mask;
+ const u64 high = (bitmap >> cur_num_bits) & cur_mask;
+
+ bool choose_low;
+ if (high == 0) {
+ // If only low val is set, choose low.
+ choose_low = true;
+ } else if (low == 0) {
+ // If only high val is set, choose high.
+ choose_low = false;
+ } else {
+ // If both are set, choose random.
+ choose_low = this->GenerateRandomBit();
+ }
+
+ // If we chose low, proceed with low.
+ if (choose_low) {
+ bitmap = low;
+ selected += 0;
+ } else {
+ bitmap = high;
+ selected += cur_num_bits;
+ }
+
+ // Proceed.
+ cur_num_bits /= 2;
+ cur_mask >>= cur_num_bits;
+ }
+
+ return selected;
+ }
+ };
+
+public:
+ static constexpr std::size_t MaxDepth = 4;
+
+private:
+ std::array<u64*, MaxDepth> bit_storages{};
+ RandomBitGenerator rng{};
+ std::size_t num_bits{};
+ std::size_t used_depths{};
+
+public:
+ KPageBitmap() = default;
+
+ constexpr std::size_t GetNumBits() const {
+ return num_bits;
+ }
+ constexpr s32 GetHighestDepthIndex() const {
+ return static_cast<s32>(used_depths) - 1;
+ }
+
+ u64* Initialize(u64* storage, std::size_t size) {
+ // Initially, everything is un-set.
+ num_bits = 0;
+
+ // Calculate the needed bitmap depth.
+ used_depths = static_cast<std::size_t>(GetRequiredDepth(size));
+ ASSERT(used_depths <= MaxDepth);
+
+ // Set the bitmap pointers.
+ for (s32 depth = this->GetHighestDepthIndex(); depth >= 0; depth--) {
+ bit_storages[depth] = storage;
+ size = Common::AlignUp(size, Common::BitSize<u64>()) / Common::BitSize<u64>();
+ storage += size;
+ }
+
+ return storage;
+ }
+
+ s64 FindFreeBlock(bool random) {
+ uintptr_t offset = 0;
+ s32 depth = 0;
+
+ if (random) {
+ do {
+ const u64 v = bit_storages[depth][offset];
+ if (v == 0) {
+ // If depth is bigger than zero, then a previous level indicated a block was
+ // free.
+ ASSERT(depth == 0);
+ return -1;
+ }
+ offset = offset * Common::BitSize<u64>() + rng.SelectRandomBit(v);
+ ++depth;
+ } while (depth < static_cast<s32>(used_depths));
+ } else {
+ do {
+ const u64 v = bit_storages[depth][offset];
+ if (v == 0) {
+ // If depth is bigger than zero, then a previous level indicated a block was
+ // free.
+ ASSERT(depth == 0);
+ return -1;
+ }
+ offset = offset * Common::BitSize<u64>() + std::countr_zero(v);
+ ++depth;
+ } while (depth < static_cast<s32>(used_depths));
+ }
+
+ return static_cast<s64>(offset);
+ }
+
+ void SetBit(std::size_t offset) {
+ this->SetBit(this->GetHighestDepthIndex(), offset);
+ num_bits++;
+ }
+
+ void ClearBit(std::size_t offset) {
+ this->ClearBit(this->GetHighestDepthIndex(), offset);
+ num_bits--;
+ }
+
+ bool ClearRange(std::size_t offset, std::size_t count) {
+ s32 depth = this->GetHighestDepthIndex();
+ u64* bits = bit_storages[depth];
+ std::size_t bit_ind = offset / Common::BitSize<u64>();
+ if (count < Common::BitSize<u64>()) {
+ const std::size_t shift = offset % Common::BitSize<u64>();
+ ASSERT(shift + count <= Common::BitSize<u64>());
+ // Check that all the bits are set.
+ const u64 mask = ((u64(1) << count) - 1) << shift;
+ u64 v = bits[bit_ind];
+ if ((v & mask) != mask) {
+ return false;
+ }
+
+ // Clear the bits.
+ v &= ~mask;
+ bits[bit_ind] = v;
+ if (v == 0) {
+ this->ClearBit(depth - 1, bit_ind);
+ }
+ } else {
+ ASSERT(offset % Common::BitSize<u64>() == 0);
+ ASSERT(count % Common::BitSize<u64>() == 0);
+ // Check that all the bits are set.
+ std::size_t remaining = count;
+ std::size_t i = 0;
+ do {
+ if (bits[bit_ind + i++] != ~u64(0)) {
+ return false;
+ }
+ remaining -= Common::BitSize<u64>();
+ } while (remaining > 0);
+
+ // Clear the bits.
+ remaining = count;
+ i = 0;
+ do {
+ bits[bit_ind + i] = 0;
+ this->ClearBit(depth - 1, bit_ind + i);
+ i++;
+ remaining -= Common::BitSize<u64>();
+ } while (remaining > 0);
+ }
+
+ num_bits -= count;
+ return true;
+ }
+
+private:
+ void SetBit(s32 depth, std::size_t offset) {
+ while (depth >= 0) {
+ std::size_t ind = offset / Common::BitSize<u64>();
+ std::size_t which = offset % Common::BitSize<u64>();
+ const u64 mask = u64(1) << which;
+
+ u64* bit = std::addressof(bit_storages[depth][ind]);
+ u64 v = *bit;
+ ASSERT((v & mask) == 0);
+ *bit = v | mask;
+ if (v) {
+ break;
+ }
+ offset = ind;
+ depth--;
+ }
+ }
+
+ void ClearBit(s32 depth, std::size_t offset) {
+ while (depth >= 0) {
+ std::size_t ind = offset / Common::BitSize<u64>();
+ std::size_t which = offset % Common::BitSize<u64>();
+ const u64 mask = u64(1) << which;
+
+ u64* bit = std::addressof(bit_storages[depth][ind]);
+ u64 v = *bit;
+ ASSERT((v & mask) != 0);
+ v &= ~mask;
+ *bit = v;
+ if (v) {
+ break;
+ }
+ offset = ind;
+ depth--;
+ }
+ }
+
+private:
+ static constexpr s32 GetRequiredDepth(std::size_t region_size) {
+ s32 depth = 0;
+ while (true) {
+ region_size /= Common::BitSize<u64>();
+ depth++;
+ if (region_size == 0) {
+ return depth;
+ }
+ }
+ }
+
+public:
+ static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size) {
+ std::size_t overhead_bits = 0;
+ for (s32 depth = GetRequiredDepth(region_size) - 1; depth >= 0; depth--) {
+ region_size =
+ Common::AlignUp(region_size, Common::BitSize<u64>()) / Common::BitSize<u64>();
+ overhead_bits += region_size;
+ }
+ return overhead_bits * sizeof(u64);
+ }
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp
index 0ab1f7205..07e062922 100644
--- a/src/core/hle/kernel/memory/page_heap.cpp
+++ b/src/core/hle/kernel/k_page_heap.cpp
@@ -2,16 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-// This file references various implementation details from Atmosphere, an open-source firmware for
-// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
-
#include "core/core.h"
-#include "core/hle/kernel/memory/page_heap.h"
+#include "core/hle/kernel/k_page_heap.h"
#include "core/memory.h"
-namespace Kernel::Memory {
+namespace Kernel {
-void PageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) {
+void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) {
// Check our assumptions
ASSERT(Common::IsAligned((address), PageSize));
ASSERT(Common::IsAligned(size, PageSize));
@@ -32,11 +29,11 @@ void PageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_
}
}
-VAddr PageHeap::AllocateBlock(s32 index) {
+VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
const std::size_t needed_size{blocks[index].GetSize()};
for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) {
- if (const VAddr addr{blocks[i].PopBlock()}; addr) {
+ if (const VAddr addr{blocks[i].PopBlock(random)}; addr) {
if (const std::size_t allocated_size{blocks[i].GetSize()};
allocated_size > needed_size) {
Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
@@ -48,13 +45,13 @@ VAddr PageHeap::AllocateBlock(s32 index) {
return 0;
}
-void PageHeap::FreeBlock(VAddr block, s32 index) {
+void KPageHeap::FreeBlock(VAddr block, s32 index) {
do {
block = blocks[index++].PushBlock(block);
} while (block != 0);
}
-void PageHeap::Free(VAddr addr, std::size_t num_pages) {
+void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
// Freeing no pages is a no-op
if (num_pages == 0) {
return;
@@ -104,16 +101,16 @@ void PageHeap::Free(VAddr addr, std::size_t num_pages) {
}
}
-std::size_t PageHeap::CalculateMetadataOverheadSize(std::size_t region_size) {
+std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) {
std::size_t overhead_size = 0;
for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) {
const std::size_t cur_block_shift{MemoryBlockPageShifts[i]};
const std::size_t next_block_shift{
(i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0};
- overhead_size += PageHeap::Block::CalculateMetadataOverheadSize(
+ overhead_size += KPageHeap::Block::CalculateManagementOverheadSize(
region_size, cur_block_shift, next_block_shift);
}
return Common::AlignUp(overhead_size, PageSize);
}
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h
new file mode 100644
index 000000000..de5d6a189
--- /dev/null
+++ b/src/core/hle/kernel/k_page_heap.h
@@ -0,0 +1,193 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bit>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "core/hle/kernel/k_page_bitmap.h"
+#include "core/hle/kernel/memory_types.h"
+
+namespace Kernel {
+
+class KPageHeap final : NonCopyable {
+public:
+ static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) {
+ const auto target_pages{std::max(num_pages, align_pages)};
+ for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
+ if (target_pages <=
+ (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
+ return static_cast<s32>(i);
+ }
+ }
+ return -1;
+ }
+
+ static constexpr s32 GetBlockIndex(std::size_t num_pages) {
+ for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) {
+ if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ static constexpr std::size_t GetBlockSize(std::size_t index) {
+ return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index];
+ }
+
+ static constexpr std::size_t GetBlockNumPages(std::size_t index) {
+ return GetBlockSize(index) / PageSize;
+ }
+
+private:
+ static constexpr std::size_t NumMemoryBlockPageShifts{7};
+ static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
+ 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E,
+ };
+
+ class Block final : NonCopyable {
+ private:
+ KPageBitmap bitmap;
+ VAddr heap_address{};
+ uintptr_t end_offset{};
+ std::size_t block_shift{};
+ std::size_t next_block_shift{};
+
+ public:
+ Block() = default;
+
+ constexpr std::size_t GetShift() const {
+ return block_shift;
+ }
+ constexpr std::size_t GetNextShift() const {
+ return next_block_shift;
+ }
+ constexpr std::size_t GetSize() const {
+ return static_cast<std::size_t>(1) << GetShift();
+ }
+ constexpr std::size_t GetNumPages() const {
+ return GetSize() / PageSize;
+ }
+ constexpr std::size_t GetNumFreeBlocks() const {
+ return bitmap.GetNumBits();
+ }
+ constexpr std::size_t GetNumFreePages() const {
+ return GetNumFreeBlocks() * GetNumPages();
+ }
+
+ u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs,
+ u64* bit_storage) {
+ // Set shifts
+ block_shift = bs;
+ next_block_shift = nbs;
+
+ // Align up the address
+ VAddr end{addr + size};
+ const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift)
+ : (1ULL << block_shift)};
+ addr = Common::AlignDown((addr), align);
+ end = Common::AlignUp((end), align);
+
+ heap_address = addr;
+ end_offset = (end - addr) / (1ULL << block_shift);
+ return bitmap.Initialize(bit_storage, end_offset);
+ }
+
+ VAddr PushBlock(VAddr address) {
+ // Set the bit for the free block
+ std::size_t offset{(address - heap_address) >> GetShift()};
+ bitmap.SetBit(offset);
+
+ // If we have a next shift, try to clear the blocks below and return the address
+ if (GetNextShift()) {
+ const auto diff{1ULL << (GetNextShift() - GetShift())};
+ offset = Common::AlignDown(offset, diff);
+ if (bitmap.ClearRange(offset, diff)) {
+ return heap_address + (offset << GetShift());
+ }
+ }
+
+ // We couldn't coalesce, or we're already as big as possible
+ return 0;
+ }
+
+ VAddr PopBlock(bool random) {
+ // Find a free block
+ const s64 soffset{bitmap.FindFreeBlock(random)};
+ if (soffset < 0) {
+ return 0;
+ }
+ const auto offset{static_cast<std::size_t>(soffset)};
+
+ // Update our tracking and return it
+ bitmap.ClearBit(offset);
+ return heap_address + (offset << GetShift());
+ }
+
+ public:
+ static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size,
+ std::size_t cur_block_shift,
+ std::size_t next_block_shift) {
+ const auto cur_block_size{(1ULL << cur_block_shift)};
+ const auto next_block_size{(1ULL << next_block_shift)};
+ const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size};
+ return KPageBitmap::CalculateManagementOverheadSize(
+ (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size);
+ }
+ };
+
+public:
+ KPageHeap() = default;
+
+ constexpr VAddr GetAddress() const {
+ return heap_address;
+ }
+ constexpr std::size_t GetSize() const {
+ return heap_size;
+ }
+ constexpr VAddr GetEndAddress() const {
+ return GetAddress() + GetSize();
+ }
+ constexpr std::size_t GetPageOffset(VAddr block) const {
+ return (block - GetAddress()) / PageSize;
+ }
+
+ void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size);
+ VAddr AllocateBlock(s32 index, bool random);
+ void Free(VAddr addr, std::size_t num_pages);
+
+ void UpdateUsedSize() {
+ used_size = heap_size - (GetNumFreePages() * PageSize);
+ }
+
+ static std::size_t CalculateManagementOverheadSize(std::size_t region_size);
+
+private:
+ constexpr std::size_t GetNumFreePages() const {
+ std::size_t num_free{};
+
+ for (const auto& block : blocks) {
+ num_free += block.GetNumFreePages();
+ }
+
+ return num_free;
+ }
+
+ void FreeBlock(VAddr block, s32 index);
+
+ VAddr heap_address{};
+ std::size_t heap_size{};
+ std::size_t used_size{};
+ std::array<Block, NumMemoryBlockPageShifts> blocks{};
+ std::vector<u64> metadata;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/page_linked_list.h b/src/core/hle/kernel/k_page_linked_list.h
index 45dc13eaf..64024d01f 100644
--- a/src/core/hle/kernel/memory/page_linked_list.h
+++ b/src/core/hle/kernel/k_page_linked_list.h
@@ -8,12 +8,12 @@
#include "common/assert.h"
#include "common/common_types.h"
-#include "core/hle/kernel/memory/memory_types.h"
+#include "core/hle/kernel/memory_types.h"
#include "core/hle/result.h"
-namespace Kernel::Memory {
+namespace Kernel {
-class PageLinkedList final {
+class KPageLinkedList final {
public:
class Node final {
public:
@@ -33,8 +33,8 @@ public:
};
public:
- PageLinkedList() = default;
- PageLinkedList(u64 address, u64 num_pages) {
+ KPageLinkedList() = default;
+ KPageLinkedList(u64 address, u64 num_pages) {
ASSERT(AddBlock(address, num_pages).IsSuccess());
}
@@ -54,7 +54,7 @@ public:
return num_pages;
}
- bool IsEqual(PageLinkedList& other) const {
+ bool IsEqual(KPageLinkedList& other) const {
auto this_node = nodes.begin();
auto other_node = other.nodes.begin();
while (this_node != nodes.end() && other_node != other.nodes.end()) {
@@ -89,4 +89,4 @@ private:
std::list<Node> nodes;
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 7de91c768..d09d5ce48 100644
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -6,19 +6,20 @@
#include "common/assert.h"
#include "common/scope_exit.h"
#include "core/core.h"
-#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/k_address_space_info.h"
+#include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_memory_block_manager.h"
+#include "core/hle/kernel/k_page_linked_list.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
+#include "core/hle/kernel/k_system_control.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/address_space_info.h"
-#include "core/hle/kernel/memory/memory_block.h"
-#include "core/hle/kernel/memory/memory_block_manager.h"
-#include "core/hle/kernel/memory/page_linked_list.h"
-#include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/memory/system_control.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
#include "core/memory.h"
-namespace Kernel::Memory {
+namespace Kernel {
namespace {
@@ -37,14 +38,14 @@ constexpr std::size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceT
}
}
-constexpr u64 GetAddressInRange(const MemoryInfo& info, VAddr addr) {
+constexpr u64 GetAddressInRange(const KMemoryInfo& info, VAddr addr) {
if (info.GetAddress() < addr) {
return addr;
}
return info.GetAddress();
}
-constexpr std::size_t GetSizeInRange(const MemoryInfo& info, VAddr start, VAddr end) {
+constexpr std::size_t GetSizeInRange(const KMemoryInfo& info, VAddr start, VAddr end) {
std::size_t size{info.GetSize()};
if (info.GetAddress() < start) {
size -= start - info.GetAddress();
@@ -57,25 +58,25 @@ constexpr std::size_t GetSizeInRange(const MemoryInfo& info, VAddr start, VAddr
} // namespace
-PageTable::PageTable(Core::System& system) : system{system} {}
+KPageTable::KPageTable(Core::System& system) : system{system} {}
-ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type,
- bool enable_aslr, VAddr code_addr, std::size_t code_size,
- Memory::MemoryManager::Pool pool) {
+ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type,
+ bool enable_aslr, VAddr code_addr,
+ std::size_t code_size, KMemoryManager::Pool pool) {
- const auto GetSpaceStart = [this](AddressSpaceInfo::Type type) {
- return AddressSpaceInfo::GetAddressSpaceStart(address_space_width, type);
+ const auto GetSpaceStart = [this](KAddressSpaceInfo::Type type) {
+ return KAddressSpaceInfo::GetAddressSpaceStart(address_space_width, type);
};
- const auto GetSpaceSize = [this](AddressSpaceInfo::Type type) {
- return AddressSpaceInfo::GetAddressSpaceSize(address_space_width, type);
+ const auto GetSpaceSize = [this](KAddressSpaceInfo::Type type) {
+ return KAddressSpaceInfo::GetAddressSpaceSize(address_space_width, type);
};
// Set our width and heap/alias sizes
address_space_width = GetAddressSpaceWidthFromType(as_type);
const VAddr start = 0;
const VAddr end{1ULL << address_space_width};
- std::size_t alias_region_size{GetSpaceSize(AddressSpaceInfo::Type::Alias)};
- std::size_t heap_region_size{GetSpaceSize(AddressSpaceInfo::Type::Heap)};
+ std::size_t alias_region_size{GetSpaceSize(KAddressSpaceInfo::Type::Alias)};
+ std::size_t heap_region_size{GetSpaceSize(KAddressSpaceInfo::Type::Heap)};
ASSERT(start <= code_addr);
ASSERT(code_addr < code_addr + code_size);
@@ -95,12 +96,12 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
std::size_t kernel_map_region_size{};
if (address_space_width == 39) {
- alias_region_size = GetSpaceSize(AddressSpaceInfo::Type::Alias);
- heap_region_size = GetSpaceSize(AddressSpaceInfo::Type::Heap);
- stack_region_size = GetSpaceSize(AddressSpaceInfo::Type::Stack);
- kernel_map_region_size = GetSpaceSize(AddressSpaceInfo::Type::Is32Bit);
- code_region_start = GetSpaceStart(AddressSpaceInfo::Type::Large64Bit);
- code_region_end = code_region_start + GetSpaceSize(AddressSpaceInfo::Type::Large64Bit);
+ alias_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Alias);
+ heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap);
+ stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack);
+ kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall);
+ code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit);
+ code_region_end = code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit);
alias_code_region_start = code_region_start;
alias_code_region_end = code_region_end;
process_code_start = Common::AlignDown(code_addr, RegionAlignment);
@@ -108,12 +109,12 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
} else {
stack_region_size = 0;
kernel_map_region_size = 0;
- code_region_start = GetSpaceStart(AddressSpaceInfo::Type::Is32Bit);
- code_region_end = code_region_start + GetSpaceSize(AddressSpaceInfo::Type::Is32Bit);
+ code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::MapSmall);
+ code_region_end = code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::MapSmall);
stack_region_start = code_region_start;
alias_code_region_start = code_region_start;
- alias_code_region_end = GetSpaceStart(AddressSpaceInfo::Type::Small64Bit) +
- GetSpaceSize(AddressSpaceInfo::Type::Small64Bit);
+ alias_code_region_end = GetSpaceStart(KAddressSpaceInfo::Type::MapLarge) +
+ GetSpaceSize(KAddressSpaceInfo::Type::MapLarge);
stack_region_end = code_region_end;
kernel_map_region_start = code_region_start;
kernel_map_region_end = code_region_end;
@@ -141,7 +142,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
(alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)};
if (alloc_size < needed_size) {
UNREACHABLE();
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
const std::size_t remaining_size{alloc_size - needed_size};
@@ -149,13 +150,13 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
// Determine random placements for each region
std::size_t alias_rnd{}, heap_rnd{}, stack_rnd{}, kmap_rnd{};
if (enable_aslr) {
- alias_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
+ alias_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
RegionAlignment;
- heap_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
+ heap_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
RegionAlignment;
- stack_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
+ stack_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
RegionAlignment;
- kmap_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
+ kmap_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) *
RegionAlignment;
}
@@ -270,21 +271,21 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
return InitializeMemoryLayout(start, end);
}
-ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemoryState state,
- MemoryPermission perm) {
+ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state,
+ KMemoryPermission perm) {
std::lock_guard lock{page_table_lock};
const u64 size{num_pages * PageSize};
if (!CanContain(addr, size, state)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (IsRegionMapped(addr, size)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- PageLinkedList page_linked_list;
+ KPageLinkedList page_linked_list;
CASCADE_CODE(
system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
@@ -294,44 +295,44 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt
return RESULT_SUCCESS;
}
-ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
const std::size_t num_pages{size / PageSize};
- MemoryState state{};
- MemoryPermission perm{};
- CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, MemoryState::All,
- MemoryState::Normal, MemoryPermission::Mask,
- MemoryPermission::ReadAndWrite, MemoryAttribute::Mask,
- MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
+ KMemoryState state{};
+ KMemoryPermission perm{};
+ CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, KMemoryState::All,
+ KMemoryState::Normal, KMemoryPermission::Mask,
+ KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask,
+ KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
if (IsRegionMapped(dst_addr, size)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- PageLinkedList page_linked_list;
+ KPageLinkedList page_linked_list;
AddRegionToPages(src_addr, num_pages, page_linked_list);
{
auto block_guard = detail::ScopeExit(
[&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
- CASCADE_CODE(
- Operate(src_addr, num_pages, MemoryPermission::None, OperationType::ChangePermissions));
- CASCADE_CODE(MapPages(dst_addr, page_linked_list, MemoryPermission::None));
+ CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
+ OperationType::ChangePermissions));
+ CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
block_guard.Cancel();
}
- block_manager->Update(src_addr, num_pages, state, MemoryPermission::None,
- MemoryAttribute::Locked);
- block_manager->Update(dst_addr, num_pages, MemoryState::AliasCode);
+ block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
+ KMemoryAttribute::Locked);
+ block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
return RESULT_SUCCESS;
}
-ResultCode PageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
if (!size) {
@@ -340,34 +341,35 @@ ResultCode PageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std
const std::size_t num_pages{size / PageSize};
- CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, src_addr, size, MemoryState::All,
- MemoryState::Normal, MemoryPermission::None,
- MemoryPermission::None, MemoryAttribute::Mask,
- MemoryAttribute::Locked, MemoryAttribute::IpcAndDeviceMapped));
+ CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, src_addr, size, KMemoryState::All,
+ KMemoryState::Normal, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::Mask,
+ KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
- MemoryState state{};
+ KMemoryState state{};
CASCADE_CODE(CheckMemoryState(
- &state, nullptr, nullptr, dst_addr, PageSize, MemoryState::FlagCanCodeAlias,
- MemoryState::FlagCanCodeAlias, MemoryPermission::None, MemoryPermission::None,
- MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
- CASCADE_CODE(CheckMemoryState(dst_addr, size, MemoryState::All, state, MemoryPermission::None,
- MemoryPermission::None, MemoryAttribute::Mask,
- MemoryAttribute::None));
- CASCADE_CODE(Operate(dst_addr, num_pages, MemoryPermission::None, OperationType::Unmap));
-
- block_manager->Update(dst_addr, num_pages, MemoryState::Free);
- block_manager->Update(src_addr, num_pages, MemoryState::Normal, MemoryPermission::ReadAndWrite);
+ &state, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
+ KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
+ CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::Mask,
+ KMemoryAttribute::None));
+ CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
+
+ block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
+ block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
+ KMemoryPermission::ReadAndWrite);
return RESULT_SUCCESS;
}
-void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, VAddr end) {
+void KPageTable::MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end) {
auto node{page_linked_list.Nodes().begin()};
PAddr map_addr{node->GetAddress()};
std::size_t src_num_pages{node->GetNumPages()};
- block_manager->IterateForRange(start, end, [&](const MemoryInfo& info) {
- if (info.state != MemoryState::Free) {
+ block_manager->IterateForRange(start, end, [&](const KMemoryInfo& info) {
+ if (info.state != KMemoryState::Free) {
return;
}
@@ -382,7 +384,7 @@ void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start,
}
const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)};
- Operate(dst_addr, num_pages, MemoryPermission::ReadAndWrite, OperationType::Map,
+ Operate(dst_addr, num_pages, KMemoryPermission::ReadAndWrite, OperationType::Map,
map_addr);
dst_addr += num_pages * PageSize;
@@ -393,14 +395,14 @@ void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start,
});
}
-ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
+ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
std::size_t mapped_size{};
const VAddr end_addr{addr + size};
- block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
- if (info.state != MemoryState::Free) {
+ block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
+ if (info.state != KMemoryState::Free) {
mapped_size += GetSizeInRange(info, addr, end_addr);
}
});
@@ -409,41 +411,39 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
return RESULT_SUCCESS;
}
- auto process{system.Kernel().CurrentProcess()};
const std::size_t remaining_size{size - mapped_size};
const std::size_t remaining_pages{remaining_size / PageSize};
- if (process->GetResourceLimit() &&
- !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) {
- return ERR_RESOURCE_LIMIT_EXCEEDED;
+ // Reserve the memory from the process resource limit.
+ KScopedResourceReservation memory_reservation(
+ system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+ remaining_size);
+ if (!memory_reservation.Succeeded()) {
+ LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size);
+ return ResultResourceLimitedExceeded;
}
- PageLinkedList page_linked_list;
- {
- auto block_guard = detail::ScopeExit([&] {
- system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool);
- process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size);
- });
+ KPageLinkedList page_linked_list;
- CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages,
- memory_pool));
+ CASCADE_CODE(
+ system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool));
- block_guard.Cancel();
- }
+ // We succeeded, so commit the memory reservation.
+ memory_reservation.Commit();
MapPhysicalMemory(page_linked_list, addr, end_addr);
physical_memory_usage += remaining_size;
const std::size_t num_pages{size / PageSize};
- block_manager->Update(addr, num_pages, MemoryState::Free, MemoryPermission::None,
- MemoryAttribute::None, MemoryState::Normal,
- MemoryPermission::ReadAndWrite, MemoryAttribute::None);
+ block_manager->Update(addr, num_pages, KMemoryState::Free, KMemoryPermission::None,
+ KMemoryAttribute::None, KMemoryState::Normal,
+ KMemoryPermission::ReadAndWrite, KMemoryAttribute::None);
return RESULT_SUCCESS;
}
-ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
+ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
const VAddr end_addr{addr + size};
@@ -451,15 +451,15 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
std::size_t mapped_size{};
// Verify that the region can be unmapped
- block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
- if (info.state == MemoryState::Normal) {
- if (info.attribute != MemoryAttribute::None) {
- result = ERR_INVALID_ADDRESS_STATE;
+ block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
+ if (info.state == KMemoryState::Normal) {
+ if (info.attribute != KMemoryAttribute::None) {
+ result = ResultInvalidCurrentMemory;
return;
}
mapped_size += GetSizeInRange(info, addr, end_addr);
- } else if (info.state != MemoryState::Free) {
- result = ERR_INVALID_ADDRESS_STATE;
+ } else if (info.state != KMemoryState::Free) {
+ result = ResultInvalidCurrentMemory;
}
});
@@ -480,23 +480,23 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
return RESULT_SUCCESS;
}
-ResultCode PageTable::UnmapMemory(VAddr addr, std::size_t size) {
+ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
const VAddr end_addr{addr + size};
ResultCode result{RESULT_SUCCESS};
- PageLinkedList page_linked_list;
+ KPageLinkedList page_linked_list;
// Unmap each region within the range
- block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
- if (info.state == MemoryState::Normal) {
+ block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
+ if (info.state == KMemoryState::Normal) {
const std::size_t block_size{GetSizeInRange(info, addr, end_addr)};
const std::size_t block_num_pages{block_size / PageSize};
const VAddr block_addr{GetAddressInRange(info, addr)};
AddRegionToPages(block_addr, block_size / PageSize, page_linked_list);
- if (result = Operate(block_addr, block_num_pages, MemoryPermission::None,
+ if (result = Operate(block_addr, block_num_pages, KMemoryPermission::None,
OperationType::Unmap);
result.IsError()) {
return;
@@ -511,93 +511,94 @@ ResultCode PageTable::UnmapMemory(VAddr addr, std::size_t size) {
const std::size_t num_pages{size / PageSize};
system.Kernel().MemoryManager().Free(page_linked_list, num_pages, memory_pool);
- block_manager->Update(addr, num_pages, MemoryState::Free);
+ block_manager->Update(addr, num_pages, KMemoryState::Free);
return RESULT_SUCCESS;
}
-ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
- MemoryState src_state{};
+ KMemoryState src_state{};
CASCADE_CODE(CheckMemoryState(
- &src_state, nullptr, nullptr, src_addr, size, MemoryState::FlagCanAlias,
- MemoryState::FlagCanAlias, MemoryPermission::Mask, MemoryPermission::ReadAndWrite,
- MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
+ &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias,
+ KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::ReadAndWrite,
+ KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
if (IsRegionMapped(dst_addr, size)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- PageLinkedList page_linked_list;
+ KPageLinkedList page_linked_list;
const std::size_t num_pages{size / PageSize};
AddRegionToPages(src_addr, num_pages, page_linked_list);
{
auto block_guard = detail::ScopeExit([&] {
- Operate(src_addr, num_pages, MemoryPermission::ReadAndWrite,
+ Operate(src_addr, num_pages, KMemoryPermission::ReadAndWrite,
OperationType::ChangePermissions);
});
- CASCADE_CODE(
- Operate(src_addr, num_pages, MemoryPermission::None, OperationType::ChangePermissions));
- CASCADE_CODE(MapPages(dst_addr, page_linked_list, MemoryPermission::ReadAndWrite));
+ CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
+ OperationType::ChangePermissions));
+ CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::ReadAndWrite));
block_guard.Cancel();
}
- block_manager->Update(src_addr, num_pages, src_state, MemoryPermission::None,
- MemoryAttribute::Locked);
- block_manager->Update(dst_addr, num_pages, MemoryState::Stack, MemoryPermission::ReadAndWrite);
+ block_manager->Update(src_addr, num_pages, src_state, KMemoryPermission::None,
+ KMemoryAttribute::Locked);
+ block_manager->Update(dst_addr, num_pages, KMemoryState::Stack,
+ KMemoryPermission::ReadAndWrite);
return RESULT_SUCCESS;
}
-ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
- MemoryState src_state{};
+ KMemoryState src_state{};
CASCADE_CODE(CheckMemoryState(
- &src_state, nullptr, nullptr, src_addr, size, MemoryState::FlagCanAlias,
- MemoryState::FlagCanAlias, MemoryPermission::Mask, MemoryPermission::None,
- MemoryAttribute::Mask, MemoryAttribute::Locked, MemoryAttribute::IpcAndDeviceMapped));
-
- MemoryPermission dst_perm{};
- CASCADE_CODE(CheckMemoryState(nullptr, &dst_perm, nullptr, dst_addr, size, MemoryState::All,
- MemoryState::Stack, MemoryPermission::None,
- MemoryPermission::None, MemoryAttribute::Mask,
- MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
-
- PageLinkedList src_pages;
- PageLinkedList dst_pages;
+ &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias,
+ KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::None,
+ KMemoryAttribute::Mask, KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
+
+ KMemoryPermission dst_perm{};
+ CASCADE_CODE(CheckMemoryState(nullptr, &dst_perm, nullptr, dst_addr, size, KMemoryState::All,
+ KMemoryState::Stack, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::Mask,
+ KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
+
+ KPageLinkedList src_pages;
+ KPageLinkedList dst_pages;
const std::size_t num_pages{size / PageSize};
AddRegionToPages(src_addr, num_pages, src_pages);
AddRegionToPages(dst_addr, num_pages, dst_pages);
if (!dst_pages.IsEqual(src_pages)) {
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
{
auto block_guard = detail::ScopeExit([&] { MapPages(dst_addr, dst_pages, dst_perm); });
- CASCADE_CODE(Operate(dst_addr, num_pages, MemoryPermission::None, OperationType::Unmap));
- CASCADE_CODE(Operate(src_addr, num_pages, MemoryPermission::ReadAndWrite,
+ CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
+ CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::ReadAndWrite,
OperationType::ChangePermissions));
block_guard.Cancel();
}
- block_manager->Update(src_addr, num_pages, src_state, MemoryPermission::ReadAndWrite);
- block_manager->Update(dst_addr, num_pages, MemoryState::Free);
+ block_manager->Update(src_addr, num_pages, src_state, KMemoryPermission::ReadAndWrite);
+ block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
return RESULT_SUCCESS;
}
-ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_list,
- MemoryPermission perm) {
+ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
+ KMemoryPermission perm) {
VAddr cur_addr{addr};
for (const auto& node : page_linked_list.Nodes()) {
@@ -606,8 +607,8 @@ ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_lis
result.IsError()) {
const std::size_t num_pages{(addr - cur_addr) / PageSize};
- ASSERT(
- Operate(addr, num_pages, MemoryPermission::None, OperationType::Unmap).IsSuccess());
+ ASSERT(Operate(addr, num_pages, KMemoryPermission::None, OperationType::Unmap)
+ .IsSuccess());
return result;
}
@@ -618,19 +619,19 @@ ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_lis
return RESULT_SUCCESS;
}
-ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, MemoryState state,
- MemoryPermission perm) {
+ResultCode KPageTable::MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
+ KMemoryPermission perm) {
std::lock_guard lock{page_table_lock};
const std::size_t num_pages{page_linked_list.GetNumPages()};
const std::size_t size{num_pages * PageSize};
if (!CanContain(addr, size, state)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (IsRegionMapped(addr, num_pages * PageSize)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
CASCADE_CODE(MapPages(addr, page_linked_list, perm));
@@ -640,26 +641,27 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem
return RESULT_SUCCESS;
}
-ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, MemoryPermission perm) {
+ResultCode KPageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size,
+ KMemoryPermission perm) {
std::lock_guard lock{page_table_lock};
- MemoryState prev_state{};
- MemoryPermission prev_perm{};
+ KMemoryState prev_state{};
+ KMemoryPermission prev_perm{};
CASCADE_CODE(CheckMemoryState(
- &prev_state, &prev_perm, nullptr, addr, size, MemoryState::FlagCode, MemoryState::FlagCode,
- MemoryPermission::None, MemoryPermission::None, MemoryAttribute::Mask,
- MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
+ &prev_state, &prev_perm, nullptr, addr, size, KMemoryState::FlagCode,
+ KMemoryState::FlagCode, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
- MemoryState state{prev_state};
+ KMemoryState state{prev_state};
// Ensure state is mutable if permission allows write
- if ((perm & MemoryPermission::Write) != MemoryPermission::None) {
- if (prev_state == MemoryState::Code) {
- state = MemoryState::CodeData;
- } else if (prev_state == MemoryState::AliasCode) {
- state = MemoryState::AliasCodeData;
+ if ((perm & KMemoryPermission::Write) != KMemoryPermission::None) {
+ if (prev_state == KMemoryState::Code) {
+ state = KMemoryState::CodeData;
+ } else if (prev_state == KMemoryState::AliasCode) {
+ state = KMemoryState::AliasCodeData;
} else {
UNREACHABLE();
}
@@ -670,13 +672,13 @@ ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, Memo
return RESULT_SUCCESS;
}
- if ((prev_perm & MemoryPermission::Execute) != (perm & MemoryPermission::Execute)) {
+ if ((prev_perm & KMemoryPermission::Execute) != (perm & KMemoryPermission::Execute)) {
// Memory execution state is changing, invalidate CPU cache range
system.InvalidateCpuInstructionCacheRange(addr, size);
}
const std::size_t num_pages{size / PageSize};
- const OperationType operation{(perm & MemoryPermission::Execute) != MemoryPermission::None
+ const OperationType operation{(perm & KMemoryPermission::Execute) != KMemoryPermission::None
? OperationType::ChangePermissionsAndRefresh
: OperationType::ChangePermissions};
@@ -687,69 +689,69 @@ ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, Memo
return RESULT_SUCCESS;
}
-MemoryInfo PageTable::QueryInfoImpl(VAddr addr) {
+KMemoryInfo KPageTable::QueryInfoImpl(VAddr addr) {
std::lock_guard lock{page_table_lock};
return block_manager->FindBlock(addr).GetMemoryInfo();
}
-MemoryInfo PageTable::QueryInfo(VAddr addr) {
+KMemoryInfo KPageTable::QueryInfo(VAddr addr) {
if (!Contains(addr, 1)) {
- return {address_space_end, 0 - address_space_end, MemoryState::Inaccessible,
- MemoryPermission::None, MemoryAttribute::None, MemoryPermission::None};
+ return {address_space_end, 0 - address_space_end, KMemoryState::Inaccessible,
+ KMemoryPermission::None, KMemoryAttribute::None, KMemoryPermission::None};
}
return QueryInfoImpl(addr);
}
-ResultCode PageTable::ReserveTransferMemory(VAddr addr, std::size_t size, MemoryPermission perm) {
+ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm) {
std::lock_guard lock{page_table_lock};
- MemoryState state{};
- MemoryAttribute attribute{};
+ KMemoryState state{};
+ KMemoryAttribute attribute{};
- CASCADE_CODE(CheckMemoryState(&state, nullptr, &attribute, addr, size,
- MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted,
- MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted,
- MemoryPermission::Mask, MemoryPermission::ReadAndWrite,
- MemoryAttribute::Mask, MemoryAttribute::None,
- MemoryAttribute::IpcAndDeviceMapped));
+ CASCADE_CODE(CheckMemoryState(
+ &state, nullptr, &attribute, addr, size,
+ KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
+ KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::Mask,
+ KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None,
+ KMemoryAttribute::IpcAndDeviceMapped));
- block_manager->Update(addr, size / PageSize, state, perm, attribute | MemoryAttribute::Locked);
+ block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked);
return RESULT_SUCCESS;
}
-ResultCode PageTable::ResetTransferMemory(VAddr addr, std::size_t size) {
+ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
- MemoryState state{};
+ KMemoryState state{};
- CASCADE_CODE(CheckMemoryState(&state, nullptr, nullptr, addr, size,
- MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted,
- MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted,
- MemoryPermission::None, MemoryPermission::None,
- MemoryAttribute::Mask, MemoryAttribute::Locked,
- MemoryAttribute::IpcAndDeviceMapped));
+ CASCADE_CODE(
+ CheckMemoryState(&state, nullptr, nullptr, addr, size,
+ KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
+ KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
+ KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask,
+ KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
- block_manager->Update(addr, size / PageSize, state, MemoryPermission::ReadAndWrite);
+ block_manager->Update(addr, size / PageSize, state, KMemoryPermission::ReadAndWrite);
return RESULT_SUCCESS;
}
-ResultCode PageTable::SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAttribute mask,
- MemoryAttribute value) {
+ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask,
+ KMemoryAttribute value) {
std::lock_guard lock{page_table_lock};
- MemoryState state{};
- MemoryPermission perm{};
- MemoryAttribute attribute{};
+ KMemoryState state{};
+ KMemoryPermission perm{};
+ KMemoryAttribute attribute{};
- CASCADE_CODE(CheckMemoryState(&state, &perm, &attribute, addr, size,
- MemoryState::FlagCanChangeAttribute,
- MemoryState::FlagCanChangeAttribute, MemoryPermission::None,
- MemoryPermission::None, MemoryAttribute::LockedAndIpcLocked,
- MemoryAttribute::None, MemoryAttribute::DeviceSharedAndUncached));
+ CASCADE_CODE(CheckMemoryState(
+ &state, &perm, &attribute, addr, size, KMemoryState::FlagCanChangeAttribute,
+ KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::LockedAndIpcLocked, KMemoryAttribute::None,
+ KMemoryAttribute::DeviceSharedAndUncached));
attribute = attribute & ~mask;
attribute = attribute | (mask & value);
@@ -759,16 +761,16 @@ ResultCode PageTable::SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAtt
return RESULT_SUCCESS;
}
-ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
+ResultCode KPageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
std::lock_guard lock{page_table_lock};
heap_capacity = new_heap_capacity;
return RESULT_SUCCESS;
}
-ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
+ResultVal<VAddr> KPageTable::SetHeapSize(std::size_t size) {
if (size > heap_region_end - heap_region_start) {
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
const u64 previous_heap_size{GetHeapSize()};
@@ -781,27 +783,34 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
const u64 delta{size - previous_heap_size};
- auto process{system.Kernel().CurrentProcess()};
- if (process->GetResourceLimit() && delta != 0 &&
- !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) {
- return ERR_RESOURCE_LIMIT_EXCEEDED;
+ // Reserve memory for the heap extension.
+ KScopedResourceReservation memory_reservation(
+ system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+ delta);
+
+ if (!memory_reservation.Succeeded()) {
+ LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
+ return ResultResourceLimitedExceeded;
}
- PageLinkedList page_linked_list;
+ KPageLinkedList page_linked_list;
const std::size_t num_pages{delta / PageSize};
CASCADE_CODE(
system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
if (IsRegionMapped(current_heap_addr, delta)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
CASCADE_CODE(
Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));
- block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal,
- MemoryPermission::ReadAndWrite);
+ // Succeeded in allocation, commit the resource reservation
+ memory_reservation.Commit();
+
+ block_manager->Update(current_heap_addr, num_pages, KMemoryState::Normal,
+ KMemoryPermission::ReadAndWrite);
current_heap_addr = heap_region_start + size;
}
@@ -809,30 +818,30 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
return MakeResult<VAddr>(heap_region_start);
}
-ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
- bool is_map_only, VAddr region_start,
- std::size_t region_num_pages, MemoryState state,
- MemoryPermission perm, PAddr map_addr) {
+ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
+ bool is_map_only, VAddr region_start,
+ std::size_t region_num_pages, KMemoryState state,
+ KMemoryPermission perm, PAddr map_addr) {
std::lock_guard lock{page_table_lock};
if (!CanContain(region_start, region_num_pages * PageSize, state)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (region_num_pages <= needed_num_pages) {
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
const VAddr addr{
AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)};
if (!addr) {
- return ERR_OUT_OF_MEMORY;
+ return ResultOutOfMemory;
}
if (is_map_only) {
CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
} else {
- PageLinkedList page_group;
+ KPageLinkedList page_group;
CASCADE_CODE(
system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool));
CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
@@ -843,22 +852,22 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
return MakeResult<VAddr>(addr);
}
-ResultCode PageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) {
+ResultCode KPageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
- MemoryPermission perm{};
+ KMemoryPermission perm{};
if (const ResultCode result{CheckMemoryState(
- nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute,
- MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None,
- MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None,
- MemoryAttribute::DeviceSharedAndUncached)};
+ nullptr, &perm, nullptr, addr, size, KMemoryState::FlagCanChangeAttribute,
+ KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::LockedAndIpcLocked, KMemoryAttribute::None,
+ KMemoryAttribute::DeviceSharedAndUncached)};
result.IsError()) {
return result;
}
block_manager->UpdateLock(
addr, size / PageSize,
- [](MemoryBlockManager::iterator block, MemoryPermission perm) {
+ [](KMemoryBlockManager::iterator block, KMemoryPermission perm) {
block->ShareToDevice(perm);
},
perm);
@@ -866,22 +875,22 @@ ResultCode PageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) {
return RESULT_SUCCESS;
}
-ResultCode PageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) {
+ResultCode KPageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) {
std::lock_guard lock{page_table_lock};
- MemoryPermission perm{};
+ KMemoryPermission perm{};
if (const ResultCode result{CheckMemoryState(
- nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute,
- MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None,
- MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None,
- MemoryAttribute::DeviceSharedAndUncached)};
+ nullptr, &perm, nullptr, addr, size, KMemoryState::FlagCanChangeAttribute,
+ KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::LockedAndIpcLocked, KMemoryAttribute::None,
+ KMemoryAttribute::DeviceSharedAndUncached)};
result.IsError()) {
return result;
}
block_manager->UpdateLock(
addr, size / PageSize,
- [](MemoryBlockManager::iterator block, MemoryPermission perm) {
+ [](KMemoryBlockManager::iterator block, KMemoryPermission perm) {
block->UnshareToDevice(perm);
},
perm);
@@ -889,20 +898,21 @@ ResultCode PageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size)
return RESULT_SUCCESS;
}
-ResultCode PageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
- block_manager = std::make_unique<MemoryBlockManager>(start, end);
+ResultCode KPageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
+ block_manager = std::make_unique<KMemoryBlockManager>(start, end);
return RESULT_SUCCESS;
}
-bool PageTable::IsRegionMapped(VAddr address, u64 size) {
- return CheckMemoryState(address, size, MemoryState::All, MemoryState::Free,
- MemoryPermission::Mask, MemoryPermission::None, MemoryAttribute::Mask,
- MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)
+bool KPageTable::IsRegionMapped(VAddr address, u64 size) {
+ return CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free,
+ KMemoryPermission::Mask, KMemoryPermission::None,
+ KMemoryAttribute::Mask, KMemoryAttribute::None,
+ KMemoryAttribute::IpcAndDeviceMapped)
.IsError();
}
-bool PageTable::IsRegionContiguous(VAddr addr, u64 size) const {
+bool KPageTable::IsRegionContiguous(VAddr addr, u64 size) const {
auto start_ptr = system.Memory().GetPointer(addr);
for (u64 offset{}; offset < size; offset += PageSize) {
if (start_ptr != system.Memory().GetPointer(addr + offset)) {
@@ -913,8 +923,8 @@ bool PageTable::IsRegionContiguous(VAddr addr, u64 size) const {
return true;
}
-void PageTable::AddRegionToPages(VAddr start, std::size_t num_pages,
- PageLinkedList& page_linked_list) {
+void KPageTable::AddRegionToPages(VAddr start, std::size_t num_pages,
+ KPageLinkedList& page_linked_list) {
VAddr addr{start};
while (addr < start + (num_pages * PageSize)) {
const PAddr paddr{GetPhysicalAddr(addr)};
@@ -926,8 +936,8 @@ void PageTable::AddRegionToPages(VAddr start, std::size_t num_pages,
}
}
-VAddr PageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages,
- u64 needed_num_pages, std::size_t align) {
+VAddr KPageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages,
+ u64 needed_num_pages, std::size_t align) {
if (is_aslr_enabled) {
UNIMPLEMENTED();
}
@@ -935,8 +945,8 @@ VAddr PageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages
IsKernel() ? 1 : 4);
}
-ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, const PageLinkedList& page_group,
- OperationType operation) {
+ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group,
+ OperationType operation) {
std::lock_guard lock{page_table_lock};
ASSERT(Common::IsAligned(addr, PageSize));
@@ -960,8 +970,8 @@ ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, const PageLinke
return RESULT_SUCCESS;
}
-ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, MemoryPermission perm,
- OperationType operation, PAddr map_addr) {
+ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
+ OperationType operation, PAddr map_addr) {
std::lock_guard lock{page_table_lock};
ASSERT(num_pages > 0);
@@ -987,34 +997,34 @@ ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, MemoryPermissio
return RESULT_SUCCESS;
}
-constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const {
+constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
switch (state) {
- case MemoryState::Free:
- case MemoryState::Kernel:
+ case KMemoryState::Free:
+ case KMemoryState::Kernel:
return address_space_start;
- case MemoryState::Normal:
+ case KMemoryState::Normal:
return heap_region_start;
- case MemoryState::Ipc:
- case MemoryState::NonSecureIpc:
- case MemoryState::NonDeviceIpc:
+ case KMemoryState::Ipc:
+ case KMemoryState::NonSecureIpc:
+ case KMemoryState::NonDeviceIpc:
return alias_region_start;
- case MemoryState::Stack:
+ case KMemoryState::Stack:
return stack_region_start;
- case MemoryState::Io:
- case MemoryState::Static:
- case MemoryState::ThreadLocal:
+ case KMemoryState::Io:
+ case KMemoryState::Static:
+ case KMemoryState::ThreadLocal:
return kernel_map_region_start;
- case MemoryState::Shared:
- case MemoryState::AliasCode:
- case MemoryState::AliasCodeData:
- case MemoryState::Transferred:
- case MemoryState::SharedTransferred:
- case MemoryState::SharedCode:
- case MemoryState::GeneratedCode:
- case MemoryState::CodeOut:
+ case KMemoryState::Shared:
+ case KMemoryState::AliasCode:
+ case KMemoryState::AliasCodeData:
+ case KMemoryState::Transferred:
+ case KMemoryState::SharedTransferred:
+ case KMemoryState::SharedCode:
+ case KMemoryState::GeneratedCode:
+ case KMemoryState::CodeOut:
return alias_code_region_start;
- case MemoryState::Code:
- case MemoryState::CodeData:
+ case KMemoryState::Code:
+ case KMemoryState::CodeData:
return code_region_start;
default:
UNREACHABLE();
@@ -1022,34 +1032,34 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const {
}
}
-constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const {
+constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
switch (state) {
- case MemoryState::Free:
- case MemoryState::Kernel:
+ case KMemoryState::Free:
+ case KMemoryState::Kernel:
return address_space_end - address_space_start;
- case MemoryState::Normal:
+ case KMemoryState::Normal:
return heap_region_end - heap_region_start;
- case MemoryState::Ipc:
- case MemoryState::NonSecureIpc:
- case MemoryState::NonDeviceIpc:
+ case KMemoryState::Ipc:
+ case KMemoryState::NonSecureIpc:
+ case KMemoryState::NonDeviceIpc:
return alias_region_end - alias_region_start;
- case MemoryState::Stack:
+ case KMemoryState::Stack:
return stack_region_end - stack_region_start;
- case MemoryState::Io:
- case MemoryState::Static:
- case MemoryState::ThreadLocal:
+ case KMemoryState::Io:
+ case KMemoryState::Static:
+ case KMemoryState::ThreadLocal:
return kernel_map_region_end - kernel_map_region_start;
- case MemoryState::Shared:
- case MemoryState::AliasCode:
- case MemoryState::AliasCodeData:
- case MemoryState::Transferred:
- case MemoryState::SharedTransferred:
- case MemoryState::SharedCode:
- case MemoryState::GeneratedCode:
- case MemoryState::CodeOut:
+ case KMemoryState::Shared:
+ case KMemoryState::AliasCode:
+ case KMemoryState::AliasCodeData:
+ case KMemoryState::Transferred:
+ case KMemoryState::SharedTransferred:
+ case KMemoryState::SharedCode:
+ case KMemoryState::GeneratedCode:
+ case KMemoryState::CodeOut:
return alias_code_region_end - alias_code_region_start;
- case MemoryState::Code:
- case MemoryState::CodeData:
+ case KMemoryState::Code:
+ case KMemoryState::CodeData:
return code_region_end - code_region_start;
default:
UNREACHABLE();
@@ -1057,7 +1067,7 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const {
}
}
-constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState state) const {
+constexpr bool KPageTable::CanContain(VAddr addr, std::size_t size, KMemoryState state) const {
const VAddr end{addr + size};
const VAddr last{end - 1};
const VAddr region_start{GetRegionAddress(state)};
@@ -1068,30 +1078,30 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s
const bool is_in_alias{!(end <= alias_region_start || alias_region_end <= addr)};
switch (state) {
- case MemoryState::Free:
- case MemoryState::Kernel:
+ case KMemoryState::Free:
+ case KMemoryState::Kernel:
return is_in_region;
- case MemoryState::Io:
- case MemoryState::Static:
- case MemoryState::Code:
- case MemoryState::CodeData:
- case MemoryState::Shared:
- case MemoryState::AliasCode:
- case MemoryState::AliasCodeData:
- case MemoryState::Stack:
- case MemoryState::ThreadLocal:
- case MemoryState::Transferred:
- case MemoryState::SharedTransferred:
- case MemoryState::SharedCode:
- case MemoryState::GeneratedCode:
- case MemoryState::CodeOut:
+ case KMemoryState::Io:
+ case KMemoryState::Static:
+ case KMemoryState::Code:
+ case KMemoryState::CodeData:
+ case KMemoryState::Shared:
+ case KMemoryState::AliasCode:
+ case KMemoryState::AliasCodeData:
+ case KMemoryState::Stack:
+ case KMemoryState::ThreadLocal:
+ case KMemoryState::Transferred:
+ case KMemoryState::SharedTransferred:
+ case KMemoryState::SharedCode:
+ case KMemoryState::GeneratedCode:
+ case KMemoryState::CodeOut:
return is_in_region && !is_in_heap && !is_in_alias;
- case MemoryState::Normal:
+ case KMemoryState::Normal:
ASSERT(is_in_heap);
return is_in_region && !is_in_alias;
- case MemoryState::Ipc:
- case MemoryState::NonSecureIpc:
- case MemoryState::NonDeviceIpc:
+ case KMemoryState::Ipc:
+ case KMemoryState::NonSecureIpc:
+ case KMemoryState::NonDeviceIpc:
ASSERT(is_in_alias);
return is_in_region && !is_in_heap;
default:
@@ -1099,53 +1109,54 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s
}
}
-constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryState state_mask,
- MemoryState state, MemoryPermission perm_mask,
- MemoryPermission perm, MemoryAttribute attr_mask,
- MemoryAttribute attr) const {
+constexpr ResultCode KPageTable::CheckMemoryState(const KMemoryInfo& info, KMemoryState state_mask,
+ KMemoryState state, KMemoryPermission perm_mask,
+ KMemoryPermission perm,
+ KMemoryAttribute attr_mask,
+ KMemoryAttribute attr) const {
// Validate the states match expectation
if ((info.state & state_mask) != state) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if ((info.perm & perm_mask) != perm) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if ((info.attribute & attr_mask) != attr) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
return RESULT_SUCCESS;
}
-ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission* out_perm,
- MemoryAttribute* out_attr, VAddr addr, std::size_t size,
- MemoryState state_mask, MemoryState state,
- MemoryPermission perm_mask, MemoryPermission perm,
- MemoryAttribute attr_mask, MemoryAttribute attr,
- MemoryAttribute ignore_attr) {
+ResultCode KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm,
+ KMemoryAttribute* out_attr, VAddr addr, std::size_t size,
+ KMemoryState state_mask, KMemoryState state,
+ KMemoryPermission perm_mask, KMemoryPermission perm,
+ KMemoryAttribute attr_mask, KMemoryAttribute attr,
+ KMemoryAttribute ignore_attr) {
std::lock_guard lock{page_table_lock};
// Get information about the first block
const VAddr last_addr{addr + size - 1};
- MemoryBlockManager::const_iterator it{block_manager->FindIterator(addr)};
- MemoryInfo info{it->GetMemoryInfo()};
+ KMemoryBlockManager::const_iterator it{block_manager->FindIterator(addr)};
+ KMemoryInfo info{it->GetMemoryInfo()};
// Validate all blocks in the range have correct state
- const MemoryState first_state{info.state};
- const MemoryPermission first_perm{info.perm};
- const MemoryAttribute first_attr{info.attribute};
+ const KMemoryState first_state{info.state};
+ const KMemoryPermission first_perm{info.perm};
+ const KMemoryAttribute first_attr{info.attribute};
while (true) {
// Validate the current block
if (!(info.state == first_state)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!(info.perm == first_perm)) {
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) ==
- (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) {
- return ERR_INVALID_ADDRESS_STATE;
+ if (!((info.attribute | static_cast<KMemoryAttribute>(ignore_attr)) ==
+ (first_attr | static_cast<KMemoryAttribute>(ignore_attr)))) {
+ return ResultInvalidCurrentMemory;
}
// Validate against the provided masks
@@ -1170,10 +1181,10 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission*
*out_perm = first_perm;
}
if (out_attr) {
- *out_attr = first_attr & static_cast<MemoryAttribute>(~ignore_attr);
+ *out_attr = first_attr & static_cast<KMemoryAttribute>(~ignore_attr);
}
return RESULT_SUCCESS;
}
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/page_table.h b/src/core/hle/kernel/k_page_table.h
index ce0d38849..49b824379 100644
--- a/src/core/hle/kernel/memory/page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -10,27 +10,27 @@
#include "common/common_types.h"
#include "common/page_table.h"
#include "core/file_sys/program_metadata.h"
-#include "core/hle/kernel/memory/memory_block.h"
-#include "core/hle/kernel/memory/memory_manager.h"
+#include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/result.h"
namespace Core {
class System;
}
-namespace Kernel::Memory {
+namespace Kernel {
-class MemoryBlockManager;
+class KMemoryBlockManager;
-class PageTable final : NonCopyable {
+class KPageTable final : NonCopyable {
public:
- explicit PageTable(Core::System& system);
+ explicit KPageTable(Core::System& system);
ResultCode InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr,
VAddr code_addr, std::size_t code_size,
- Memory::MemoryManager::Pool pool);
- ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, MemoryState state,
- MemoryPermission perm);
+ KMemoryManager::Pool pool);
+ ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
+ KMemoryPermission perm);
ResultCode MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
@@ -38,20 +38,20 @@ public:
ResultCode UnmapMemory(VAddr addr, std::size_t size);
ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size);
ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size);
- ResultCode MapPages(VAddr addr, PageLinkedList& page_linked_list, MemoryState state,
- MemoryPermission perm);
- ResultCode SetCodeMemoryPermission(VAddr addr, std::size_t size, MemoryPermission perm);
- MemoryInfo QueryInfo(VAddr addr);
- ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, MemoryPermission perm);
+ ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
+ KMemoryPermission perm);
+ ResultCode SetCodeMemoryPermission(VAddr addr, std::size_t size, KMemoryPermission perm);
+ KMemoryInfo QueryInfo(VAddr addr);
+ ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm);
ResultCode ResetTransferMemory(VAddr addr, std::size_t size);
- ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAttribute mask,
- MemoryAttribute value);
+ ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask,
+ KMemoryAttribute value);
ResultCode SetHeapCapacity(std::size_t new_heap_capacity);
ResultVal<VAddr> SetHeapSize(std::size_t size);
ResultVal<VAddr> AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align,
bool is_map_only, VAddr region_start,
- std::size_t region_num_pages, MemoryState state,
- MemoryPermission perm, PAddr map_addr = 0);
+ std::size_t region_num_pages, KMemoryState state,
+ KMemoryPermission perm, PAddr map_addr = 0);
ResultCode LockForDeviceAddressSpace(VAddr addr, std::size_t size);
ResultCode UnlockForDeviceAddressSpace(VAddr addr, std::size_t size);
@@ -72,47 +72,49 @@ private:
ChangePermissionsAndRefresh,
};
- static constexpr MemoryAttribute DefaultMemoryIgnoreAttr =
- MemoryAttribute::DontCareMask | MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared;
+ static constexpr KMemoryAttribute DefaultMemoryIgnoreAttr = KMemoryAttribute::DontCareMask |
+ KMemoryAttribute::IpcLocked |
+ KMemoryAttribute::DeviceShared;
ResultCode InitializeMemoryLayout(VAddr start, VAddr end);
- ResultCode MapPages(VAddr addr, const PageLinkedList& page_linked_list, MemoryPermission perm);
- void MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, VAddr end);
+ ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
+ KMemoryPermission perm);
+ void MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end);
bool IsRegionMapped(VAddr address, u64 size);
bool IsRegionContiguous(VAddr addr, u64 size) const;
- void AddRegionToPages(VAddr start, std::size_t num_pages, PageLinkedList& page_linked_list);
- MemoryInfo QueryInfoImpl(VAddr addr);
+ void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list);
+ KMemoryInfo QueryInfoImpl(VAddr addr);
VAddr AllocateVirtualMemory(VAddr start, std::size_t region_num_pages, u64 needed_num_pages,
std::size_t align);
- ResultCode Operate(VAddr addr, std::size_t num_pages, const PageLinkedList& page_group,
+ ResultCode Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group,
OperationType operation);
- ResultCode Operate(VAddr addr, std::size_t num_pages, MemoryPermission perm,
+ ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
OperationType operation, PAddr map_addr = 0);
- constexpr VAddr GetRegionAddress(MemoryState state) const;
- constexpr std::size_t GetRegionSize(MemoryState state) const;
- constexpr bool CanContain(VAddr addr, std::size_t size, MemoryState state) const;
+ constexpr VAddr GetRegionAddress(KMemoryState state) const;
+ constexpr std::size_t GetRegionSize(KMemoryState state) const;
+ constexpr bool CanContain(VAddr addr, std::size_t size, KMemoryState state) const;
- constexpr ResultCode CheckMemoryState(const MemoryInfo& info, MemoryState state_mask,
- MemoryState state, MemoryPermission perm_mask,
- MemoryPermission perm, MemoryAttribute attr_mask,
- MemoryAttribute attr) const;
- ResultCode CheckMemoryState(MemoryState* out_state, MemoryPermission* out_perm,
- MemoryAttribute* out_attr, VAddr addr, std::size_t size,
- MemoryState state_mask, MemoryState state,
- MemoryPermission perm_mask, MemoryPermission perm,
- MemoryAttribute attr_mask, MemoryAttribute attr,
- MemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr);
- ResultCode CheckMemoryState(VAddr addr, std::size_t size, MemoryState state_mask,
- MemoryState state, MemoryPermission perm_mask,
- MemoryPermission perm, MemoryAttribute attr_mask,
- MemoryAttribute attr,
- MemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr) {
+ constexpr ResultCode CheckMemoryState(const KMemoryInfo& info, KMemoryState state_mask,
+ KMemoryState state, KMemoryPermission perm_mask,
+ KMemoryPermission perm, KMemoryAttribute attr_mask,
+ KMemoryAttribute attr) const;
+ ResultCode CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm,
+ KMemoryAttribute* out_attr, VAddr addr, std::size_t size,
+ KMemoryState state_mask, KMemoryState state,
+ KMemoryPermission perm_mask, KMemoryPermission perm,
+ KMemoryAttribute attr_mask, KMemoryAttribute attr,
+ KMemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr);
+ ResultCode CheckMemoryState(VAddr addr, std::size_t size, KMemoryState state_mask,
+ KMemoryState state, KMemoryPermission perm_mask,
+ KMemoryPermission perm, KMemoryAttribute attr_mask,
+ KMemoryAttribute attr,
+ KMemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr) {
return CheckMemoryState(nullptr, nullptr, nullptr, addr, size, state_mask, state, perm_mask,
perm, attr_mask, attr, ignore_attr);
}
std::recursive_mutex page_table_lock;
- std::unique_ptr<MemoryBlockManager> block_manager;
+ std::unique_ptr<KMemoryBlockManager> block_manager;
public:
constexpr VAddr GetAddressSpaceStart() const {
@@ -212,7 +214,7 @@ public:
return !IsOutsideASLRRegion(address, size);
}
constexpr PAddr GetPhysicalAddr(VAddr addr) {
- return page_table_impl.backing_addr[addr >> Memory::PageBits] + addr;
+ return page_table_impl.backing_addr[addr >> PageBits] + addr;
}
private:
@@ -267,11 +269,11 @@ private:
bool is_kernel{};
bool is_aslr_enabled{};
- MemoryManager::Pool memory_pool{MemoryManager::Pool::Application};
+ KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application};
Common::PageTable page_table_impl;
Core::System& system;
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_readable_event.cpp b/src/core/hle/kernel/k_readable_event.cpp
index d8a42dbaf..4b4d34857 100644
--- a/src/core/hle/kernel/k_readable_event.cpp
+++ b/src/core/hle/kernel/k_readable_event.cpp
@@ -6,7 +6,6 @@
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_thread.h"
@@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() {
KScopedSchedulerLock lk{kernel};
if (!is_signaled) {
- return Svc::ResultInvalidState;
+ return ResultInvalidState;
}
is_signaled = false;
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp
index ab2ab683f..d7a4a38e6 100644
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const {
ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
const auto index = static_cast<std::size_t>(which);
KScopedLightLock lk(lock);
- R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState);
+ R_UNLESS(current_values[index] <= value, ResultInvalidState);
limit_values[index] = value;
diff --git a/src/core/hle/kernel/k_scoped_resource_reservation.h b/src/core/hle/kernel/k_scoped_resource_reservation.h
new file mode 100644
index 000000000..c5deca00b
--- /dev/null
+++ b/src/core/hle/kernel/k_scoped_resource_reservation.h
@@ -0,0 +1,67 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file references various implementation details from Atmosphere, an open-source firmware for
+// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/process.h"
+
+namespace Kernel {
+
+class KScopedResourceReservation {
+public:
+ explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
+ s64 v, s64 timeout)
+ : resource_limit(std::move(l)), value(v), resource(r) {
+ if (resource_limit && value) {
+ success = resource_limit->Reserve(resource, value, timeout);
+ } else {
+ success = true;
+ }
+ }
+
+ explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
+ s64 v = 1)
+ : resource_limit(std::move(l)), value(v), resource(r) {
+ if (resource_limit && value) {
+ success = resource_limit->Reserve(resource, value);
+ } else {
+ success = true;
+ }
+ }
+
+ explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t)
+ : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {}
+
+ explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1)
+ : KScopedResourceReservation(p->GetResourceLimit(), r, v) {}
+
+ ~KScopedResourceReservation() noexcept {
+ if (resource_limit && value && success) {
+ // resource was not committed, release the reservation.
+ resource_limit->Release(resource, value);
+ }
+ }
+
+ /// Commit the resource reservation, destruction of this object does not release the resource
+ void Commit() {
+ resource_limit = nullptr;
+ }
+
+ [[nodiscard]] bool Succeeded() const {
+ return success;
+ }
+
+private:
+ std::shared_ptr<KResourceLimit> resource_limit;
+ s64 value;
+ LimitableResource resource;
+ bool success;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_shared_memory.cpp b/src/core/hle/kernel/k_shared_memory.cpp
new file mode 100644
index 000000000..9b14f42b5
--- /dev/null
+++ b/src/core/hle/kernel/k_shared_memory.cpp
@@ -0,0 +1,65 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
+#include "core/hle/kernel/k_shared_memory.h"
+#include "core/hle/kernel/kernel.h"
+
+namespace Kernel {
+
+KSharedMemory::KSharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory)
+ : Object{kernel}, device_memory{device_memory} {}
+
+KSharedMemory::~KSharedMemory() {
+ kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size);
+}
+
+std::shared_ptr<KSharedMemory> KSharedMemory::Create(
+ KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
+ KPageLinkedList&& page_list, KMemoryPermission owner_permission,
+ KMemoryPermission user_permission, PAddr physical_address, std::size_t size, std::string name) {
+
+ const auto resource_limit = kernel.GetSystemResourceLimit();
+ KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
+ size);
+ ASSERT(memory_reservation.Succeeded());
+
+ std::shared_ptr<KSharedMemory> shared_memory{
+ std::make_shared<KSharedMemory>(kernel, device_memory)};
+
+ shared_memory->owner_process = owner_process;
+ shared_memory->page_list = std::move(page_list);
+ shared_memory->owner_permission = owner_permission;
+ shared_memory->user_permission = user_permission;
+ shared_memory->physical_address = physical_address;
+ shared_memory->size = size;
+ shared_memory->name = name;
+
+ memory_reservation.Commit();
+ return shared_memory;
+}
+
+ResultCode KSharedMemory::Map(Process& target_process, VAddr address, std::size_t size,
+ KMemoryPermission permissions) {
+ const u64 page_count{(size + PageSize - 1) / PageSize};
+
+ if (page_list.GetNumPages() != page_count) {
+ UNIMPLEMENTED_MSG("Page count does not match");
+ }
+
+ const KMemoryPermission expected =
+ &target_process == owner_process ? owner_permission : user_permission;
+
+ if (permissions != expected) {
+ UNIMPLEMENTED_MSG("Permission does not match");
+ }
+
+ return target_process.PageTable().MapPages(address, page_list, KMemoryState::Shared,
+ permissions);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/k_shared_memory.h
index 623bd8b11..016e34be5 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/k_shared_memory.h
@@ -9,8 +9,8 @@
#include "common/common_types.h"
#include "core/device_memory.h"
-#include "core/hle/kernel/memory/memory_block.h"
-#include "core/hle/kernel/memory/page_linked_list.h"
+#include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_page_linked_list.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/result.h"
@@ -19,15 +19,15 @@ namespace Kernel {
class KernelCore;
-class SharedMemory final : public Object {
+class KSharedMemory final : public Object {
public:
- explicit SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory);
- ~SharedMemory() override;
+ explicit KSharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory);
+ ~KSharedMemory() override;
- static std::shared_ptr<SharedMemory> Create(
+ static std::shared_ptr<KSharedMemory> Create(
KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
- Memory::PageLinkedList&& page_list, Memory::MemoryPermission owner_permission,
- Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size,
+ KPageLinkedList&& page_list, KMemoryPermission owner_permission,
+ KMemoryPermission user_permission, PAddr physical_address, std::size_t size,
std::string name);
std::string GetTypeName() const override {
@@ -51,7 +51,7 @@ public:
* @param permissions Memory block map permissions (specified by SVC field)
*/
ResultCode Map(Process& target_process, VAddr address, std::size_t size,
- Memory::MemoryPermission permissions);
+ KMemoryPermission permissions);
/**
* Gets a pointer to the shared memory block
@@ -76,9 +76,9 @@ public:
private:
Core::DeviceMemory& device_memory;
Process* owner_process{};
- Memory::PageLinkedList page_list;
- Memory::MemoryPermission owner_permission{};
- Memory::MemoryPermission user_permission{};
+ KPageLinkedList page_list;
+ KMemoryPermission owner_permission{};
+ KMemoryPermission user_permission{};
PAddr physical_address{};
std::size_t size{};
std::string name;
diff --git a/src/core/hle/kernel/memory/slab_heap.h b/src/core/hle/kernel/k_slab_heap.h
index 465eaddb3..aa4471d2f 100644
--- a/src/core/hle/kernel/memory/slab_heap.h
+++ b/src/core/hle/kernel/k_slab_heap.h
@@ -2,9 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-// This file references various implementation details from Atmosphere, an open-source firmware for
-// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
-
#pragma once
#include <atomic>
@@ -12,17 +9,17 @@
#include "common/assert.h"
#include "common/common_types.h"
-namespace Kernel::Memory {
+namespace Kernel {
namespace impl {
-class SlabHeapImpl final : NonCopyable {
+class KSlabHeapImpl final : NonCopyable {
public:
struct Node {
Node* next{};
};
- constexpr SlabHeapImpl() = default;
+ constexpr KSlabHeapImpl() = default;
void Initialize(std::size_t size) {
ASSERT(head == nullptr);
@@ -65,9 +62,9 @@ private:
} // namespace impl
-class SlabHeapBase : NonCopyable {
+class KSlabHeapBase : NonCopyable {
public:
- constexpr SlabHeapBase() = default;
+ constexpr KSlabHeapBase() = default;
constexpr bool Contains(uintptr_t addr) const {
return start <= addr && addr < end;
@@ -126,7 +123,7 @@ public:
}
private:
- using Impl = impl::SlabHeapImpl;
+ using Impl = impl::KSlabHeapImpl;
Impl impl;
uintptr_t peak{};
@@ -135,9 +132,9 @@ private:
};
template <typename T>
-class SlabHeap final : public SlabHeapBase {
+class KSlabHeap final : public KSlabHeapBase {
public:
- constexpr SlabHeap() : SlabHeapBase() {}
+ constexpr KSlabHeap() : KSlabHeapBase() {}
void Initialize(void* memory, std::size_t memory_size) {
InitializeImpl(sizeof(T), memory, memory_size);
@@ -160,4 +157,4 @@ public:
}
};
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_spin_lock.cpp b/src/core/hle/kernel/k_spin_lock.cpp
new file mode 100644
index 000000000..4412aa4bb
--- /dev/null
+++ b/src/core/hle/kernel/k_spin_lock.cpp
@@ -0,0 +1,54 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/kernel/k_spin_lock.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#if _M_AMD64
+#define __x86_64__ 1
+#endif
+#if _M_ARM64
+#define __aarch64__ 1
+#endif
+#else
+#if __x86_64__
+#include <xmmintrin.h>
+#endif
+#endif
+
+namespace {
+
+void ThreadPause() {
+#if __x86_64__
+ _mm_pause();
+#elif __aarch64__ && _MSC_VER
+ __yield();
+#elif __aarch64__
+ asm("yield");
+#endif
+}
+
+} // namespace
+
+namespace Kernel {
+
+void KSpinLock::Lock() {
+ while (lck.test_and_set(std::memory_order_acquire)) {
+ ThreadPause();
+ }
+}
+
+void KSpinLock::Unlock() {
+ lck.clear(std::memory_order_release);
+}
+
+bool KSpinLock::TryLock() {
+ if (lck.test_and_set(std::memory_order_acquire)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_spin_lock.h b/src/core/hle/kernel/k_spin_lock.h
new file mode 100644
index 000000000..12c4b2e88
--- /dev/null
+++ b/src/core/hle/kernel/k_spin_lock.h
@@ -0,0 +1,33 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+
+#include "core/hle/kernel/k_scoped_lock.h"
+
+namespace Kernel {
+
+class KSpinLock {
+public:
+ KSpinLock() = default;
+
+ KSpinLock(const KSpinLock&) = delete;
+ KSpinLock& operator=(const KSpinLock&) = delete;
+
+ KSpinLock(KSpinLock&&) = delete;
+ KSpinLock& operator=(KSpinLock&&) = delete;
+
+ void Lock();
+ void Unlock();
+ [[nodiscard]] bool TryLock();
+
+private:
+ std::atomic_flag lck = ATOMIC_FLAG_INIT;
+};
+
+using KScopedSpinLock = KScopedLock<KSpinLock>;
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index 140cc46a7..82f72a0fe 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
// Check if the timeout is zero.
if (timeout == 0) {
slp.CancelSleep();
- return Svc::ResultTimedOut;
+ return ResultTimedOut;
}
// Check if the thread should terminate.
if (thread->IsTerminationRequested()) {
slp.CancelSleep();
- return Svc::ResultTerminationRequested;
+ return ResultTerminationRequested;
}
// Check if waiting was canceled.
if (thread->IsWaitCancelled()) {
slp.CancelSleep();
thread->ClearWaitCancelled();
- return Svc::ResultCancelled;
+ return ResultCancelled;
}
// Add the waiters.
@@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
// Mark the thread as waiting.
thread->SetCancellable();
- thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+ thread->SetSyncedObject(nullptr, ResultTimedOut);
thread->SetState(ThreadState::Waiting);
thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
}
diff --git a/src/core/hle/kernel/memory/system_control.cpp b/src/core/hle/kernel/k_system_control.cpp
index 11d204bc2..aa1682f69 100644
--- a/src/core/hle/kernel/memory/system_control.cpp
+++ b/src/core/hle/kernel/k_system_control.cpp
@@ -1,12 +1,13 @@
-// Copyright 2020 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <random>
-#include "core/hle/kernel/memory/system_control.h"
+#include "core/hle/kernel/k_system_control.h"
+
+namespace Kernel {
-namespace Kernel::Memory::SystemControl {
namespace {
template <typename F>
u64 GenerateUniformRange(u64 min, u64 max, F f) {
@@ -25,16 +26,17 @@ u64 GenerateUniformRange(u64 min, u64 max, F f) {
}
}
-u64 GenerateRandomU64ForInit() {
+} // Anonymous namespace
+
+u64 KSystemControl::GenerateRandomU64() {
static std::random_device device;
static std::mt19937 gen(device());
static std::uniform_int_distribution<u64> distribution(1, std::numeric_limits<u64>::max());
return distribution(gen);
}
-} // Anonymous namespace
-u64 GenerateRandomRange(u64 min, u64 max) {
- return GenerateUniformRange(min, max, GenerateRandomU64ForInit);
+u64 KSystemControl::GenerateRandomRange(u64 min, u64 max) {
+ return GenerateUniformRange(min, max, GenerateRandomU64);
}
-} // namespace Kernel::Memory::SystemControl
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_system_control.h b/src/core/hle/kernel/k_system_control.h
new file mode 100644
index 000000000..1d5b64ffa
--- /dev/null
+++ b/src/core/hle/kernel/k_system_control.h
@@ -0,0 +1,19 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Kernel {
+
+class KSystemControl {
+public:
+ KSystemControl() = default;
+
+ static u64 GenerateRandomRange(u64 min, u64 max);
+ static u64 GenerateRandomU64();
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index b59259c4f..1661afbd9 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -18,16 +18,15 @@
#include "core/core.h"
#include "core/cpu_manager.h"
#include "core/hardware_properties.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/k_condition_variable.h"
+#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/k_thread_queue.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/memory_layout.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/svc_results.h"
@@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
// Set core ID and wait result.
core_id = phys_core;
- wait_result = Svc::ResultNoSynchronizationObject;
+ wait_result = ResultNoSynchronizationObject;
// Set priorities.
priority = prio;
@@ -238,7 +237,7 @@ void KThread::Finalize() {
while (it != waiter_list.end()) {
// The thread shouldn't be a kernel waiter.
it->SetLockOwner(nullptr);
- it->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+ it->SetSyncedObject(nullptr, ResultInvalidState);
it->Wakeup();
it = waiter_list.erase(it);
}
@@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
// If the core id is no-update magic, preserve the ideal core id.
if (core_id == Svc::IdealCoreNoUpdate) {
core_id = virtual_ideal_core_id;
- R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination);
+ R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination);
}
// Set the virtual core/affinity mask.
@@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
if (GetStackParameters().is_pinned) {
// Verify that the current thread isn't terminating.
R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
- Svc::ResultTerminationRequested);
+ ResultTerminationRequested);
// Note that the thread was pinned.
thread_is_pinned = true;
@@ -604,7 +603,7 @@ void KThread::WaitCancel() {
sleeping_queue->WakeupThread(this);
wait_cancelled = true;
} else {
- SetSyncedObject(nullptr, Svc::ResultCancelled);
+ SetSyncedObject(nullptr, ResultCancelled);
SetState(ThreadState::Runnable);
wait_cancelled = false;
}
@@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
// Verify our state.
const auto cur_state = GetState();
R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable),
- Svc::ResultInvalidState);
+ ResultInvalidState);
// Either pause or resume.
if (activity == Svc::ThreadActivity::Paused) {
// Verify that we're not suspended.
- R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+ R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
// Suspend.
RequestSuspend(SuspendType::Thread);
@@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
ASSERT(activity == Svc::ThreadActivity::Runnable);
// Verify that we're suspended.
- R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+ R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
// Resume.
Resume(SuspendType::Thread);
@@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
if (GetStackParameters().is_pinned) {
// Verify that the current thread isn't terminating.
R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
- Svc::ResultTerminationRequested);
+ ResultTerminationRequested);
// Note that the thread was pinned and not current.
thread_is_pinned = true;
@@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) {
KScopedSchedulerLock sl{kernel};
// Verify that we're suspended.
- R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+ R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
// If we're not terminating, get the thread's user context.
if (!IsTerminationRequested()) {
@@ -783,7 +782,7 @@ void KThread::AddWaiterImpl(KThread* thread) {
}
// Keep track of how many kernel waiters we have.
- if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+ if (IsKernelAddressKey(thread->GetAddressKey())) {
ASSERT((num_kernel_waiters++) >= 0);
}
@@ -796,7 +795,7 @@ void KThread::RemoveWaiterImpl(KThread* thread) {
ASSERT(kernel.GlobalSchedulerContext().IsLocked());
// Keep track of how many kernel waiters we have.
- if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+ if (IsKernelAddressKey(thread->GetAddressKey())) {
ASSERT((num_kernel_waiters--) > 0);
}
@@ -871,7 +870,7 @@ KThread* KThread::RemoveWaiterByKey(s32* out_num_waiters, VAddr key) {
KThread* thread = std::addressof(*it);
// Keep track of how many kernel waiters we have.
- if (Memory::IsKernelAddressKey(thread->GetAddressKey())) {
+ if (IsKernelAddressKey(thread->GetAddressKey())) {
ASSERT((num_kernel_waiters--) > 0);
}
it = waiter_list.erase(it);
@@ -905,12 +904,11 @@ ResultCode KThread::Run() {
KScopedSchedulerLock lk{kernel};
// If either this thread or the current thread are requesting termination, note it.
- R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested);
- R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
- Svc::ResultTerminationRequested);
+ R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested);
+ R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested);
// Ensure our thread state is correct.
- R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState);
+ R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState);
// If the current thread has been asked to suspend, suspend it and retry.
if (GetCurrentThread(kernel).IsSuspended()) {
@@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) {
// Check if the thread should terminate.
if (IsTerminationRequested()) {
slp.CancelSleep();
- return Svc::ResultTerminationRequested;
+ return ResultTerminationRequested;
}
// Mark the thread as waiting.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index b20c2d13a..331cf3a60 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -26,19 +26,19 @@
#include "core/device_memory.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/k_memory_layout.h"
+#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_shared_memory.h"
+#include "core/hle/kernel/k_slab_heap.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/memory_layout.h"
-#include "core/hle/kernel/memory/memory_manager.h"
-#include "core/hle/kernel/memory/slab_heap.h"
#include "core/hle/kernel/physical_core.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/service_thread.h"
-#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/svc_results.h"
#include "core/hle/kernel/time_manager.h"
#include "core/hle/lock.h"
#include "core/hle/result.h"
@@ -101,8 +101,6 @@ struct KernelCore::Impl {
current_process = nullptr;
- system_resource_limit = nullptr;
-
global_handle_table.Clear();
preemption_event = nullptr;
@@ -111,6 +109,13 @@ struct KernelCore::Impl {
exclusive_monitor.reset();
+ hid_shared_mem = nullptr;
+ font_shared_mem = nullptr;
+ irs_shared_mem = nullptr;
+ time_shared_mem = nullptr;
+
+ system_resource_limit = nullptr;
+
// Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
}
@@ -141,11 +146,17 @@ struct KernelCore::Impl {
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200)
.IsSuccess());
- ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess());
+ ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess());
- if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) {
+ // Derived from recent software updates. The kernel reserves 27MB
+ constexpr u64 kernel_size{0x1b00000};
+ if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) {
UNREACHABLE();
}
+ // Reserve secure applet memory, introduced in firmware 5.0.0
+ constexpr u64 secure_applet_memory_size{0x400000};
+ ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory,
+ secure_applet_memory_size));
}
void InitializePreemption(KernelCore& kernel) {
@@ -260,7 +271,7 @@ struct KernelCore::Impl {
void InitializeMemoryLayout() {
// Initialize memory layout
- constexpr Memory::MemoryLayout layout{Memory::MemoryLayout::GetDefaultLayout()};
+ constexpr KMemoryLayout layout{KMemoryLayout::GetDefaultLayout()};
constexpr std::size_t hid_size{0x40000};
constexpr std::size_t font_size{0x1100000};
constexpr std::size_t irs_size{0x8000};
@@ -271,39 +282,42 @@ struct KernelCore::Impl {
constexpr PAddr time_addr{layout.System().StartAddress() + hid_size + font_size + irs_size};
// Initialize memory manager
- memory_manager = std::make_unique<Memory::MemoryManager>();
- memory_manager->InitializeManager(Memory::MemoryManager::Pool::Application,
+ memory_manager = std::make_unique<KMemoryManager>();
+ memory_manager->InitializeManager(KMemoryManager::Pool::Application,
layout.Application().StartAddress(),
layout.Application().EndAddress());
- memory_manager->InitializeManager(Memory::MemoryManager::Pool::Applet,
+ memory_manager->InitializeManager(KMemoryManager::Pool::Applet,
layout.Applet().StartAddress(),
layout.Applet().EndAddress());
- memory_manager->InitializeManager(Memory::MemoryManager::Pool::System,
+ memory_manager->InitializeManager(KMemoryManager::Pool::System,
layout.System().StartAddress(),
layout.System().EndAddress());
- hid_shared_mem = Kernel::SharedMemory::Create(
- system.Kernel(), system.DeviceMemory(), nullptr,
- {hid_addr, hid_size / Memory::PageSize}, Memory::MemoryPermission::None,
- Memory::MemoryPermission::Read, hid_addr, hid_size, "HID:SharedMemory");
- font_shared_mem = Kernel::SharedMemory::Create(
- system.Kernel(), system.DeviceMemory(), nullptr,
- {font_pa, font_size / Memory::PageSize}, Memory::MemoryPermission::None,
- Memory::MemoryPermission::Read, font_pa, font_size, "Font:SharedMemory");
- irs_shared_mem = Kernel::SharedMemory::Create(
- system.Kernel(), system.DeviceMemory(), nullptr,
- {irs_addr, irs_size / Memory::PageSize}, Memory::MemoryPermission::None,
- Memory::MemoryPermission::Read, irs_addr, irs_size, "IRS:SharedMemory");
- time_shared_mem = Kernel::SharedMemory::Create(
- system.Kernel(), system.DeviceMemory(), nullptr,
- {time_addr, time_size / Memory::PageSize}, Memory::MemoryPermission::None,
- Memory::MemoryPermission::Read, time_addr, time_size, "Time:SharedMemory");
+ hid_shared_mem = Kernel::KSharedMemory::Create(
+ system.Kernel(), system.DeviceMemory(), nullptr, {hid_addr, hid_size / PageSize},
+ KMemoryPermission::None, KMemoryPermission::Read, hid_addr, hid_size,
+ "HID:SharedMemory");
+ font_shared_mem = Kernel::KSharedMemory::Create(
+ system.Kernel(), system.DeviceMemory(), nullptr, {font_pa, font_size / PageSize},
+ KMemoryPermission::None, KMemoryPermission::Read, font_pa, font_size,
+ "Font:SharedMemory");
+ irs_shared_mem = Kernel::KSharedMemory::Create(
+ system.Kernel(), system.DeviceMemory(), nullptr, {irs_addr, irs_size / PageSize},
+ KMemoryPermission::None, KMemoryPermission::Read, irs_addr, irs_size,
+ "IRS:SharedMemory");
+ time_shared_mem = Kernel::KSharedMemory::Create(
+ system.Kernel(), system.DeviceMemory(), nullptr, {time_addr, time_size / PageSize},
+ KMemoryPermission::None, KMemoryPermission::Read, time_addr, time_size,
+ "Time:SharedMemory");
// Allocate slab heaps
- user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>();
+ user_slab_heap_pages = std::make_unique<KSlabHeap<Page>>();
+ constexpr u64 user_slab_heap_size{0x1ef000};
+ // Reserve slab heaps
+ ASSERT(
+ system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
// Initialize slab heaps
- constexpr u64 user_slab_heap_size{0x3de000};
user_slab_heap_pages->Initialize(
system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
user_slab_heap_size);
@@ -339,14 +353,14 @@ struct KernelCore::Impl {
std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES};
// Kernel memory management
- std::unique_ptr<Memory::MemoryManager> memory_manager;
- std::unique_ptr<Memory::SlabHeap<Memory::Page>> user_slab_heap_pages;
+ std::unique_ptr<KMemoryManager> memory_manager;
+ std::unique_ptr<KSlabHeap<Page>> user_slab_heap_pages;
// Shared memory for services
- std::shared_ptr<Kernel::SharedMemory> hid_shared_mem;
- std::shared_ptr<Kernel::SharedMemory> font_shared_mem;
- std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
- std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
+ std::shared_ptr<Kernel::KSharedMemory> hid_shared_mem;
+ std::shared_ptr<Kernel::KSharedMemory> font_shared_mem;
+ std::shared_ptr<Kernel::KSharedMemory> irs_shared_mem;
+ std::shared_ptr<Kernel::KSharedMemory> time_shared_mem;
// Threads used for services
std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
@@ -564,51 +578,51 @@ KThread* KernelCore::GetCurrentEmuThread() const {
return impl->GetCurrentEmuThread();
}
-Memory::MemoryManager& KernelCore::MemoryManager() {
+KMemoryManager& KernelCore::MemoryManager() {
return *impl->memory_manager;
}
-const Memory::MemoryManager& KernelCore::MemoryManager() const {
+const KMemoryManager& KernelCore::MemoryManager() const {
return *impl->memory_manager;
}
-Memory::SlabHeap<Memory::Page>& KernelCore::GetUserSlabHeapPages() {
+KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() {
return *impl->user_slab_heap_pages;
}
-const Memory::SlabHeap<Memory::Page>& KernelCore::GetUserSlabHeapPages() const {
+const KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() const {
return *impl->user_slab_heap_pages;
}
-Kernel::SharedMemory& KernelCore::GetHidSharedMem() {
+Kernel::KSharedMemory& KernelCore::GetHidSharedMem() {
return *impl->hid_shared_mem;
}
-const Kernel::SharedMemory& KernelCore::GetHidSharedMem() const {
+const Kernel::KSharedMemory& KernelCore::GetHidSharedMem() const {
return *impl->hid_shared_mem;
}
-Kernel::SharedMemory& KernelCore::GetFontSharedMem() {
+Kernel::KSharedMemory& KernelCore::GetFontSharedMem() {
return *impl->font_shared_mem;
}
-const Kernel::SharedMemory& KernelCore::GetFontSharedMem() const {
+const Kernel::KSharedMemory& KernelCore::GetFontSharedMem() const {
return *impl->font_shared_mem;
}
-Kernel::SharedMemory& KernelCore::GetIrsSharedMem() {
+Kernel::KSharedMemory& KernelCore::GetIrsSharedMem() {
return *impl->irs_shared_mem;
}
-const Kernel::SharedMemory& KernelCore::GetIrsSharedMem() const {
+const Kernel::KSharedMemory& KernelCore::GetIrsSharedMem() const {
return *impl->irs_shared_mem;
}
-Kernel::SharedMemory& KernelCore::GetTimeSharedMem() {
+Kernel::KSharedMemory& KernelCore::GetTimeSharedMem() {
return *impl->time_shared_mem;
}
-const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const {
+const Kernel::KSharedMemory& KernelCore::GetTimeSharedMem() const {
return *impl->time_shared_mem;
}
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 806a0d986..56906f2da 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,7 +11,7 @@
#include <vector>
#include "core/arm/cpu_interrupt_handler.h"
#include "core/hardware_properties.h"
-#include "core/hle/kernel/memory/memory_types.h"
+#include "core/hle/kernel/memory_types.h"
#include "core/hle/kernel/object.h"
namespace Core {
@@ -27,25 +27,23 @@ struct EventType;
namespace Kernel {
-namespace Memory {
-class MemoryManager;
-template <typename T>
-class SlabHeap;
-} // namespace Memory
-
class ClientPort;
class GlobalSchedulerContext;
class HandleTable;
-class PhysicalCore;
-class Process;
+class KMemoryManager;
class KResourceLimit;
class KScheduler;
-class SharedMemory;
+class KSharedMemory;
+class KThread;
+class PhysicalCore;
+class Process;
class ServiceThread;
class Synchronization;
-class KThread;
class TimeManager;
+template <typename T>
+class KSlabHeap;
+
using EmuThreadHandle = uintptr_t;
constexpr EmuThreadHandle EmuThreadHandleInvalid{};
constexpr EmuThreadHandle EmuThreadHandleReserved{1ULL << 63};
@@ -178,40 +176,40 @@ public:
void RegisterHostThread();
/// Gets the virtual memory manager for the kernel.
- Memory::MemoryManager& MemoryManager();
+ KMemoryManager& MemoryManager();
/// Gets the virtual memory manager for the kernel.
- const Memory::MemoryManager& MemoryManager() const;
+ const KMemoryManager& MemoryManager() const;
/// Gets the slab heap allocated for user space pages.
- Memory::SlabHeap<Memory::Page>& GetUserSlabHeapPages();
+ KSlabHeap<Page>& GetUserSlabHeapPages();
/// Gets the slab heap allocated for user space pages.
- const Memory::SlabHeap<Memory::Page>& GetUserSlabHeapPages() const;
+ const KSlabHeap<Page>& GetUserSlabHeapPages() const;
/// Gets the shared memory object for HID services.
- Kernel::SharedMemory& GetHidSharedMem();
+ Kernel::KSharedMemory& GetHidSharedMem();
/// Gets the shared memory object for HID services.
- const Kernel::SharedMemory& GetHidSharedMem() const;
+ const Kernel::KSharedMemory& GetHidSharedMem() const;
/// Gets the shared memory object for font services.
- Kernel::SharedMemory& GetFontSharedMem();
+ Kernel::KSharedMemory& GetFontSharedMem();
/// Gets the shared memory object for font services.
- const Kernel::SharedMemory& GetFontSharedMem() const;
+ const Kernel::KSharedMemory& GetFontSharedMem() const;
/// Gets the shared memory object for IRS services.
- Kernel::SharedMemory& GetIrsSharedMem();
+ Kernel::KSharedMemory& GetIrsSharedMem();
/// Gets the shared memory object for IRS services.
- const Kernel::SharedMemory& GetIrsSharedMem() const;
+ const Kernel::KSharedMemory& GetIrsSharedMem() const;
/// Gets the shared memory object for Time services.
- Kernel::SharedMemory& GetTimeSharedMem();
+ Kernel::KSharedMemory& GetTimeSharedMem();
/// Gets the shared memory object for Time services.
- const Kernel::SharedMemory& GetTimeSharedMem() const;
+ const Kernel::KSharedMemory& GetTimeSharedMem() const;
/// Suspend/unsuspend the OS.
void Suspend(bool in_suspention);
diff --git a/src/core/hle/kernel/memory/page_heap.h b/src/core/hle/kernel/memory/page_heap.h
deleted file mode 100644
index 131093284..000000000
--- a/src/core/hle/kernel/memory/page_heap.h
+++ /dev/null
@@ -1,370 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-// This file references various implementation details from Atmosphere, an open-source firmware for
-// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
-
-#pragma once
-
-#include <array>
-#include <bit>
-#include <vector>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "core/hle/kernel/memory/memory_types.h"
-
-namespace Kernel::Memory {
-
-class PageHeap final : NonCopyable {
-public:
- static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) {
- const auto target_pages{std::max(num_pages, align_pages)};
- for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
- if (target_pages <=
- (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
- return static_cast<s32>(i);
- }
- }
- return -1;
- }
-
- static constexpr s32 GetBlockIndex(std::size_t num_pages) {
- for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) {
- if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
- return i;
- }
- }
- return -1;
- }
-
- static constexpr std::size_t GetBlockSize(std::size_t index) {
- return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index];
- }
-
- static constexpr std::size_t GetBlockNumPages(std::size_t index) {
- return GetBlockSize(index) / PageSize;
- }
-
-private:
- static constexpr std::size_t NumMemoryBlockPageShifts{7};
- static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
- 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E,
- };
-
- class Block final : NonCopyable {
- private:
- class Bitmap final : NonCopyable {
- public:
- static constexpr std::size_t MaxDepth{4};
-
- private:
- std::array<u64*, MaxDepth> bit_storages{};
- std::size_t num_bits{};
- std::size_t used_depths{};
-
- public:
- constexpr Bitmap() = default;
-
- constexpr std::size_t GetNumBits() const {
- return num_bits;
- }
- constexpr s32 GetHighestDepthIndex() const {
- return static_cast<s32>(used_depths) - 1;
- }
-
- constexpr u64* Initialize(u64* storage, std::size_t size) {
- //* Initially, everything is un-set
- num_bits = 0;
-
- // Calculate the needed bitmap depth
- used_depths = static_cast<std::size_t>(GetRequiredDepth(size));
- ASSERT(used_depths <= MaxDepth);
-
- // Set the bitmap pointers
- for (s32 depth{GetHighestDepthIndex()}; depth >= 0; depth--) {
- bit_storages[depth] = storage;
- size = Common::AlignUp(size, 64) / 64;
- storage += size;
- }
-
- return storage;
- }
-
- s64 FindFreeBlock() const {
- uintptr_t offset{};
- s32 depth{};
-
- do {
- const u64 v{bit_storages[depth][offset]};
- if (v == 0) {
- // Non-zero depth indicates that a previous level had a free block
- ASSERT(depth == 0);
- return -1;
- }
- offset = offset * 64 + static_cast<u32>(std::countr_zero(v));
- ++depth;
- } while (depth < static_cast<s32>(used_depths));
-
- return static_cast<s64>(offset);
- }
-
- constexpr void SetBit(std::size_t offset) {
- SetBit(GetHighestDepthIndex(), offset);
- num_bits++;
- }
-
- constexpr void ClearBit(std::size_t offset) {
- ClearBit(GetHighestDepthIndex(), offset);
- num_bits--;
- }
-
- constexpr bool ClearRange(std::size_t offset, std::size_t count) {
- const s32 depth{GetHighestDepthIndex()};
- const auto bit_ind{offset / 64};
- u64* bits{bit_storages[depth]};
- if (count < 64) {
- const auto shift{offset % 64};
- ASSERT(shift + count <= 64);
- // Check that all the bits are set
- const u64 mask{((1ULL << count) - 1) << shift};
- u64 v{bits[bit_ind]};
- if ((v & mask) != mask) {
- return false;
- }
-
- // Clear the bits
- v &= ~mask;
- bits[bit_ind] = v;
- if (v == 0) {
- ClearBit(depth - 1, bit_ind);
- }
- } else {
- ASSERT(offset % 64 == 0);
- ASSERT(count % 64 == 0);
- // Check that all the bits are set
- std::size_t remaining{count};
- std::size_t i = 0;
- do {
- if (bits[bit_ind + i++] != ~u64(0)) {
- return false;
- }
- remaining -= 64;
- } while (remaining > 0);
-
- // Clear the bits
- remaining = count;
- i = 0;
- do {
- bits[bit_ind + i] = 0;
- ClearBit(depth - 1, bit_ind + i);
- i++;
- remaining -= 64;
- } while (remaining > 0);
- }
-
- num_bits -= count;
- return true;
- }
-
- private:
- constexpr void SetBit(s32 depth, std::size_t offset) {
- while (depth >= 0) {
- const auto ind{offset / 64};
- const auto which{offset % 64};
- const u64 mask{1ULL << which};
-
- u64* bit{std::addressof(bit_storages[depth][ind])};
- const u64 v{*bit};
- ASSERT((v & mask) == 0);
- *bit = v | mask;
- if (v) {
- break;
- }
- offset = ind;
- depth--;
- }
- }
-
- constexpr void ClearBit(s32 depth, std::size_t offset) {
- while (depth >= 0) {
- const auto ind{offset / 64};
- const auto which{offset % 64};
- const u64 mask{1ULL << which};
-
- u64* bit{std::addressof(bit_storages[depth][ind])};
- u64 v{*bit};
- ASSERT((v & mask) != 0);
- v &= ~mask;
- *bit = v;
- if (v) {
- break;
- }
- offset = ind;
- depth--;
- }
- }
-
- private:
- static constexpr s32 GetRequiredDepth(std::size_t region_size) {
- s32 depth = 0;
- while (true) {
- region_size /= 64;
- depth++;
- if (region_size == 0) {
- return depth;
- }
- }
- }
-
- public:
- static constexpr std::size_t CalculateMetadataOverheadSize(std::size_t region_size) {
- std::size_t overhead_bits = 0;
- for (s32 depth{GetRequiredDepth(region_size) - 1}; depth >= 0; depth--) {
- region_size = Common::AlignUp(region_size, 64) / 64;
- overhead_bits += region_size;
- }
- return overhead_bits * sizeof(u64);
- }
- };
-
- private:
- Bitmap bitmap;
- VAddr heap_address{};
- uintptr_t end_offset{};
- std::size_t block_shift{};
- std::size_t next_block_shift{};
-
- public:
- constexpr Block() = default;
-
- constexpr std::size_t GetShift() const {
- return block_shift;
- }
- constexpr std::size_t GetNextShift() const {
- return next_block_shift;
- }
- constexpr std::size_t GetSize() const {
- return static_cast<std::size_t>(1) << GetShift();
- }
- constexpr std::size_t GetNumPages() const {
- return GetSize() / PageSize;
- }
- constexpr std::size_t GetNumFreeBlocks() const {
- return bitmap.GetNumBits();
- }
- constexpr std::size_t GetNumFreePages() const {
- return GetNumFreeBlocks() * GetNumPages();
- }
-
- constexpr u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs,
- u64* bit_storage) {
- // Set shifts
- block_shift = bs;
- next_block_shift = nbs;
-
- // Align up the address
- VAddr end{addr + size};
- const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift)
- : (1ULL << block_shift)};
- addr = Common::AlignDown((addr), align);
- end = Common::AlignUp((end), align);
-
- heap_address = addr;
- end_offset = (end - addr) / (1ULL << block_shift);
- return bitmap.Initialize(bit_storage, end_offset);
- }
-
- constexpr VAddr PushBlock(VAddr address) {
- // Set the bit for the free block
- std::size_t offset{(address - heap_address) >> GetShift()};
- bitmap.SetBit(offset);
-
- // If we have a next shift, try to clear the blocks below and return the address
- if (GetNextShift()) {
- const auto diff{1ULL << (GetNextShift() - GetShift())};
- offset = Common::AlignDown(offset, diff);
- if (bitmap.ClearRange(offset, diff)) {
- return heap_address + (offset << GetShift());
- }
- }
-
- // We couldn't coalesce, or we're already as big as possible
- return 0;
- }
-
- VAddr PopBlock() {
- // Find a free block
- const s64 soffset{bitmap.FindFreeBlock()};
- if (soffset < 0) {
- return 0;
- }
- const auto offset{static_cast<std::size_t>(soffset)};
-
- // Update our tracking and return it
- bitmap.ClearBit(offset);
- return heap_address + (offset << GetShift());
- }
-
- public:
- static constexpr std::size_t CalculateMetadataOverheadSize(std::size_t region_size,
- std::size_t cur_block_shift,
- std::size_t next_block_shift) {
- const auto cur_block_size{(1ULL << cur_block_shift)};
- const auto next_block_size{(1ULL << next_block_shift)};
- const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size};
- return Bitmap::CalculateMetadataOverheadSize(
- (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size);
- }
- };
-
-public:
- PageHeap() = default;
-
- constexpr VAddr GetAddress() const {
- return heap_address;
- }
- constexpr std::size_t GetSize() const {
- return heap_size;
- }
- constexpr VAddr GetEndAddress() const {
- return GetAddress() + GetSize();
- }
- constexpr std::size_t GetPageOffset(VAddr block) const {
- return (block - GetAddress()) / PageSize;
- }
-
- void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size);
- VAddr AllocateBlock(s32 index);
- void Free(VAddr addr, std::size_t num_pages);
-
- void UpdateUsedSize() {
- used_size = heap_size - (GetNumFreePages() * PageSize);
- }
-
- static std::size_t CalculateMetadataOverheadSize(std::size_t region_size);
-
-private:
- constexpr std::size_t GetNumFreePages() const {
- std::size_t num_free{};
-
- for (const auto& block : blocks) {
- num_free += block.GetNumFreePages();
- }
-
- return num_free;
- }
-
- void FreeBlock(VAddr block, s32 index);
-
- VAddr heap_address{};
- std::size_t heap_size{};
- std::size_t used_size{};
- std::array<Block, NumMemoryBlockPageShifts> blocks{};
- std::vector<u64> metadata;
-};
-
-} // namespace Kernel::Memory
diff --git a/src/core/hle/kernel/memory/system_control.h b/src/core/hle/kernel/memory/system_control.h
deleted file mode 100644
index 19cab8cbc..000000000
--- a/src/core/hle/kernel/memory/system_control.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Kernel::Memory::SystemControl {
-
-u64 GenerateRandomRange(u64 min, u64 max);
-
-} // namespace Kernel::Memory::SystemControl
diff --git a/src/core/hle/kernel/memory/memory_types.h b/src/core/hle/kernel/memory_types.h
index a75bf77c0..d458f0eca 100644
--- a/src/core/hle/kernel/memory/memory_types.h
+++ b/src/core/hle/kernel/memory_types.h
@@ -8,11 +8,11 @@
#include "common/common_types.h"
-namespace Kernel::Memory {
+namespace Kernel {
constexpr std::size_t PageBits{12};
constexpr std::size_t PageSize{1 << PageBits};
using Page = std::array<u8, PageSize>;
-} // namespace Kernel::Memory
+} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 2286b292d..73b85d6f9 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -14,14 +14,14 @@
#include "core/device_memory.h"
#include "core/file_sys/program_metadata.h"
#include "core/hle/kernel/code_set.h"
-#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/k_memory_block_manager.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
+#include "core/hle/kernel/k_slab_heap.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/memory_block_manager.h"
-#include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/memory/slab_heap.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/svc_results.h"
#include "core/hle/lock.h"
@@ -39,6 +39,7 @@ namespace {
*/
void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
+ ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0,
owner_process.GetIdealCoreId(), stack_top, &owner_process);
@@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
std::shared_ptr<Process> process = std::make_shared<Process>(system);
process->name = std::move(name);
+
+ // TODO: This is inaccurate
+ // The process should hold a reference to the kernel-wide resource limit.
process->resource_limit = std::make_shared<KResourceLimit>(kernel, system);
process->status = ProcessStatus::Created;
process->program_id = 0;
@@ -155,6 +159,9 @@ void Process::DecrementThreadCount() {
}
u64 Process::GetTotalPhysicalMemoryAvailable() const {
+ // TODO: This is expected to always return the application memory pool size after accurately
+ // reserving kernel resources. The current workaround uses a process-local resource limit of
+ // application memory pool size, which is inaccurate.
const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
main_thread_stack_size};
@@ -248,8 +255,8 @@ ResultCode Process::Reset() {
KScopedSchedulerLock sl{kernel};
// Validate that we're in a state that we can reset.
- R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState);
- R_UNLESS(is_signaled, Svc::ResultInvalidState);
+ R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState);
+ R_UNLESS(is_signaled, ResultInvalidState);
// Clear signaled.
is_signaled = false;
@@ -264,18 +271,29 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
system_resource_size = metadata.GetSystemResourceSize();
image_size = code_size;
+ // Set initial resource limits
+ resource_limit->SetLimitValue(
+ LimitableResource::PhysicalMemory,
+ kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application));
+ KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
+ code_size + system_resource_size);
+ if (!memory_reservation.Succeeded()) {
+ LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes",
+ code_size + system_resource_size);
+ return ResultResourceLimitedExceeded;
+ }
// Initialize proces address space
if (const ResultCode result{
page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000,
- code_size, Memory::MemoryManager::Pool::Application)};
+ code_size, KMemoryManager::Pool::Application)};
result.IsError()) {
return result;
}
// Map process code region
- if (const ResultCode result{page_table->MapProcessCode(
- page_table->GetCodeRegionStart(), code_size / Memory::PageSize,
- Memory::MemoryState::Code, Memory::MemoryPermission::None)};
+ if (const ResultCode result{page_table->MapProcessCode(page_table->GetCodeRegionStart(),
+ code_size / PageSize, KMemoryState::Code,
+ KMemoryPermission::None)};
result.IsError()) {
return result;
}
@@ -308,21 +326,24 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
// Set initial resource limits
resource_limit->SetLimitValue(
LimitableResource::PhysicalMemory,
- kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
+ kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application));
+
resource_limit->SetLimitValue(LimitableResource::Threads, 608);
resource_limit->SetLimitValue(LimitableResource::Events, 700);
resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128);
resource_limit->SetLimitValue(LimitableResource::Sessions, 894);
- ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size));
// Create TLS region
tls_region_address = CreateTLSRegion();
+ memory_reservation.Commit();
return handle_table.SetSize(capabilities.GetHandleTableSize());
}
void Process::Run(s32 main_thread_priority, u64 stack_size) {
AllocateMainThreadStack(stack_size);
+ resource_limit->Reserve(LimitableResource::Threads, 1);
+ resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
@@ -330,8 +351,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
ChangeStatus(ProcessStatus::Running);
SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
- resource_limit->Reserve(LimitableResource::Threads, 1);
- resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
}
void Process::PrepareForTermination() {
@@ -358,6 +377,11 @@ void Process::PrepareForTermination() {
FreeTLSRegion(tls_region_address);
tls_region_address = 0;
+ if (resource_limit) {
+ resource_limit->Release(LimitableResource::PhysicalMemory,
+ main_thread_stack_size + image_size);
+ }
+
ChangeStatus(ProcessStatus::Exited);
}
@@ -381,22 +405,22 @@ VAddr Process::CreateTLSRegion() {
return *tls_page_iter->ReserveSlot();
}
- Memory::Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()};
+ Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()};
ASSERT(tls_page_ptr);
const VAddr start{page_table->GetKernelMapRegionStart()};
const VAddr size{page_table->GetKernelMapRegionEnd() - start};
const PAddr tls_map_addr{system.DeviceMemory().GetPhysicalAddr(tls_page_ptr)};
- const VAddr tls_page_addr{
- page_table
- ->AllocateAndMapMemory(1, Memory::PageSize, true, start, size / Memory::PageSize,
- Memory::MemoryState::ThreadLocal,
- Memory::MemoryPermission::ReadAndWrite, tls_map_addr)
- .ValueOr(0)};
+ const VAddr tls_page_addr{page_table
+ ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize,
+ KMemoryState::ThreadLocal,
+ KMemoryPermission::ReadAndWrite,
+ tls_map_addr)
+ .ValueOr(0)};
ASSERT(tls_page_addr);
- std::memset(tls_page_ptr, 0, Memory::PageSize);
+ std::memset(tls_page_ptr, 0, PageSize);
tls_pages.emplace_back(tls_page_addr);
const auto reserve_result{tls_pages.back().ReserveSlot()};
@@ -423,15 +447,15 @@ void Process::FreeTLSRegion(VAddr tls_address) {
void Process::LoadModule(CodeSet code_set, VAddr base_addr) {
std::lock_guard lock{HLE::g_hle_lock};
const auto ReprotectSegment = [&](const CodeSet::Segment& segment,
- Memory::MemoryPermission permission) {
+ KMemoryPermission permission) {
page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission);
};
system.Memory().WriteBlock(*this, base_addr, code_set.memory.data(), code_set.memory.size());
- ReprotectSegment(code_set.CodeSegment(), Memory::MemoryPermission::ReadAndExecute);
- ReprotectSegment(code_set.RODataSegment(), Memory::MemoryPermission::Read);
- ReprotectSegment(code_set.DataSegment(), Memory::MemoryPermission::ReadAndWrite);
+ ReprotectSegment(code_set.CodeSegment(), KMemoryPermission::ReadAndExecute);
+ ReprotectSegment(code_set.RODataSegment(), KMemoryPermission::Read);
+ ReprotectSegment(code_set.DataSegment(), KMemoryPermission::ReadAndWrite);
}
bool Process::IsSignaled() const {
@@ -440,9 +464,9 @@ bool Process::IsSignaled() const {
}
Process::Process(Core::System& system)
- : KSynchronizationObject{system.Kernel()},
- page_table{std::make_unique<Memory::PageTable>(system)}, handle_table{system.Kernel()},
- address_arbiter{system}, condition_var{system}, state_lock{system.Kernel()}, system{system} {}
+ : KSynchronizationObject{system.Kernel()}, page_table{std::make_unique<KPageTable>(system)},
+ handle_table{system.Kernel()}, address_arbiter{system}, condition_var{system},
+ state_lock{system.Kernel()}, system{system} {}
Process::~Process() = default;
@@ -460,16 +484,15 @@ ResultCode Process::AllocateMainThreadStack(std::size_t stack_size) {
ASSERT(stack_size);
// The kernel always ensures that the given stack size is page aligned.
- main_thread_stack_size = Common::AlignUp(stack_size, Memory::PageSize);
+ main_thread_stack_size = Common::AlignUp(stack_size, PageSize);
const VAddr start{page_table->GetStackRegionStart()};
const std::size_t size{page_table->GetStackRegionEnd() - start};
CASCADE_RESULT(main_thread_stack_top,
page_table->AllocateAndMapMemory(
- main_thread_stack_size / Memory::PageSize, Memory::PageSize, false, start,
- size / Memory::PageSize, Memory::MemoryState::Stack,
- Memory::MemoryPermission::ReadAndWrite));
+ main_thread_stack_size / PageSize, PageSize, false, start, size / PageSize,
+ KMemoryState::Stack, KMemoryPermission::ReadAndWrite));
main_thread_stack_top += main_thread_stack_size;
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 320b0f347..45eefb90e 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -29,16 +29,13 @@ class ProgramMetadata;
namespace Kernel {
class KernelCore;
+class KPageTable;
class KResourceLimit;
class KThread;
class TLSPage;
struct CodeSet;
-namespace Memory {
-class PageTable;
-}
-
enum class MemoryRegion : u16 {
APPLICATION = 1,
SYSTEM = 2,
@@ -104,12 +101,12 @@ public:
}
/// Gets a reference to the process' page table.
- Memory::PageTable& PageTable() {
+ KPageTable& PageTable() {
return *page_table;
}
/// Gets const a reference to the process' page table.
- const Memory::PageTable& PageTable() const {
+ const KPageTable& PageTable() const {
return *page_table;
}
@@ -385,7 +382,7 @@ private:
ResultCode AllocateMainThreadStack(std::size_t stack_size);
/// Memory manager for this process
- std::unique_ptr<Memory::PageTable> page_table;
+ std::unique_ptr<KPageTable> page_table;
/// Current status of the process
ProcessStatus status{};
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 0566311b6..3fc326eab 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -6,10 +6,10 @@
#include "common/bit_util.h"
#include "common/logging/log.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/process_capability.h"
+#include "core/hle/kernel/svc_results.h"
namespace Kernel {
namespace {
@@ -69,7 +69,7 @@ u32 GetFlagBitOffset(CapabilityType type) {
ResultCode ProcessCapabilities::InitializeForKernelProcess(const u32* capabilities,
std::size_t num_capabilities,
- Memory::PageTable& page_table) {
+ KPageTable& page_table) {
Clear();
// Allow all cores and priorities.
@@ -82,7 +82,7 @@ ResultCode ProcessCapabilities::InitializeForKernelProcess(const u32* capabiliti
ResultCode ProcessCapabilities::InitializeForUserProcess(const u32* capabilities,
std::size_t num_capabilities,
- Memory::PageTable& page_table) {
+ KPageTable& page_table) {
Clear();
return ParseCapabilities(capabilities, num_capabilities, page_table);
@@ -108,7 +108,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
std::size_t num_capabilities,
- Memory::PageTable& page_table) {
+ KPageTable& page_table) {
u32 set_flags = 0;
u32 set_svc_bits = 0;
@@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
// If there's only one, then there's a problem.
if (i >= num_capabilities) {
LOG_ERROR(Kernel, "Invalid combination! i={}", i);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
const auto size_flags = capabilities[i];
if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) {
LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table);
@@ -155,11 +155,11 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
}
ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& set_svc_bits,
- u32 flag, Memory::PageTable& page_table) {
+ u32 flag, KPageTable& page_table) {
const auto type = GetCapabilityType(flag);
if (type == CapabilityType::Unset) {
- return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+ return ResultInvalidCapabilityDescriptor;
}
// Bail early on ignorable entries, as one would expect,
@@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
LOG_ERROR(Kernel,
"Attempted to initialize flags that may only be initialized once. set_flags={}",
set_flags);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
set_flags |= set_flag;
@@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
}
LOG_ERROR(Kernel, "Invalid capability type! type={}", type);
- return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+ return ResultInvalidCapabilityDescriptor;
}
void ProcessCapabilities::Clear() {
@@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
if (priority_mask != 0 || core_mask != 0) {
LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}",
priority_mask, core_mask);
- return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+ return ResultInvalidCapabilityDescriptor;
}
const u32 core_num_min = (flags >> 16) & 0xFF;
@@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
if (core_num_min > core_num_max) {
LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}",
core_num_min, core_num_max);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
const u32 priority_min = (flags >> 10) & 0x3F;
@@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
LOG_ERROR(Kernel,
"Priority min is greater than priority max! priority_min={}, priority_max={}",
core_num_min, priority_max);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
// The switch only has 4 usable cores.
if (core_num_max >= 4) {
LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max);
- return ERR_INVALID_PROCESSOR_ID;
+ return ResultInvalidCoreId;
}
const auto make_mask = [](u64 min, u64 max) {
@@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
// If we've already set this svc before, bail.
if ((set_svc_bits & svc_bit) != 0) {
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
set_svc_bits |= svc_bit;
@@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
if (svc_number >= svc_capabilities.size()) {
LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number);
- return ERR_OUT_OF_RANGE;
+ return ResultOutOfRange;
}
svc_capabilities[svc_number] = true;
@@ -293,12 +293,12 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
}
ResultCode ProcessCapabilities::HandleMapPhysicalFlags(u32 flags, u32 size_flags,
- Memory::PageTable& page_table) {
+ KPageTable& page_table) {
// TODO(Lioncache): Implement once the memory manager can handle this.
return RESULT_SUCCESS;
}
-ResultCode ProcessCapabilities::HandleMapIOFlags(u32 flags, Memory::PageTable& page_table) {
+ResultCode ProcessCapabilities::HandleMapIOFlags(u32 flags, KPageTable& page_table) {
// TODO(Lioncache): Implement once the memory manager can handle this.
return RESULT_SUCCESS;
}
@@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
if (interrupt >= interrupt_capabilities.size()) {
LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}",
interrupt);
- return ERR_OUT_OF_RANGE;
+ return ResultOutOfRange;
}
interrupt_capabilities[interrupt] = true;
@@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) {
const u32 reserved = flags >> 17;
if (reserved != 0) {
LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
- return ERR_RESERVED_VALUE;
+ return ResultReservedValue;
}
program_type = static_cast<ProgramType>((flags >> 14) & 0b111);
@@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
LOG_ERROR(Kernel,
"Kernel version is non zero or flags are too small! major_version={}, flags={}",
major_version, flags);
- return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+ return ResultInvalidCapabilityDescriptor;
}
kernel_version = flags;
@@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
const u32 reserved = flags >> 26;
if (reserved != 0) {
LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
- return ERR_RESERVED_VALUE;
+ return ResultReservedValue;
}
handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
@@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) {
const u32 reserved = flags >> 19;
if (reserved != 0) {
LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
- return ERR_RESERVED_VALUE;
+ return ResultReservedValue;
}
is_debuggable = (flags & 0x20000) != 0;
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index ea9d12c16..73ad197fa 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -12,9 +12,7 @@ union ResultCode;
namespace Kernel {
-namespace Memory {
-class PageTable;
-}
+class KPageTable;
/// The possible types of programs that may be indicated
/// by the program type capability descriptor.
@@ -90,7 +88,7 @@ public:
/// otherwise, an error code upon failure.
///
ResultCode InitializeForKernelProcess(const u32* capabilities, std::size_t num_capabilities,
- Memory::PageTable& page_table);
+ KPageTable& page_table);
/// Initializes this process capabilities instance for a userland process.
///
@@ -103,7 +101,7 @@ public:
/// otherwise, an error code upon failure.
///
ResultCode InitializeForUserProcess(const u32* capabilities, std::size_t num_capabilities,
- Memory::PageTable& page_table);
+ KPageTable& page_table);
/// Initializes this process capabilities instance for a process that does not
/// have any metadata to parse.
@@ -189,7 +187,7 @@ private:
/// @return RESULT_SUCCESS if no errors occur, otherwise an error code.
///
ResultCode ParseCapabilities(const u32* capabilities, std::size_t num_capabilities,
- Memory::PageTable& page_table);
+ KPageTable& page_table);
/// Attempts to parse a capability descriptor that is only represented by a
/// single flag set.
@@ -204,7 +202,7 @@ private:
/// @return RESULT_SUCCESS if no errors occurred, otherwise an error code.
///
ResultCode ParseSingleFlagCapability(u32& set_flags, u32& set_svc_bits, u32 flag,
- Memory::PageTable& page_table);
+ KPageTable& page_table);
/// Clears the internal state of this process capability instance. Necessary,
/// to have a sane starting point due to us allowing running executables without
@@ -228,10 +226,10 @@ private:
ResultCode HandleSyscallFlags(u32& set_svc_bits, u32 flags);
/// Handles flags related to mapping physical memory pages.
- ResultCode HandleMapPhysicalFlags(u32 flags, u32 size_flags, Memory::PageTable& page_table);
+ ResultCode HandleMapPhysicalFlags(u32 flags, u32 size_flags, KPageTable& page_table);
/// Handles flags related to mapping IO pages.
- ResultCode HandleMapIOFlags(u32 flags, Memory::PageTable& page_table);
+ ResultCode HandleMapIOFlags(u32 flags, KPageTable& page_table);
/// Handles flags related to the interrupt capability flags.
ResultCode HandleInterruptFlags(u32 flags);
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index fe7a483c4..5d17346ad 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -5,11 +5,11 @@
#include <tuple>
#include "common/assert.h"
#include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/server_port.h"
#include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/svc_results.h"
namespace Kernel {
@@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default;
ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
if (pending_sessions.empty()) {
- return ERR_NOT_FOUND;
+ return ResultNotFound;
}
auto session = std::move(pending_sessions.back());
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index 75304b961..8830d4e91 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -4,15 +4,23 @@
#include "common/assert.h"
#include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
#include "core/hle/kernel/server_session.h"
#include "core/hle/kernel/session.h"
namespace Kernel {
Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
-Session::~Session() = default;
+Session::~Session() {
+ // Release reserved resource when the Session pair was created.
+ kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1);
+}
Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
+ // Reserve a new session from the resource limit.
+ KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(),
+ LimitableResource::Sessions);
+ ASSERT(session_reservation.Succeeded());
auto session{std::make_shared<Session>(kernel)};
auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()};
auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()};
@@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
session->client = client_session;
session->server = server_session;
+ session_reservation.Commit();
return std::make_pair(std::move(client_session), std::move(server_session));
}
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
deleted file mode 100644
index 0cd467110..000000000
--- a/src/core/hle/kernel/shared_memory.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "core/core.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/shared_memory.h"
-
-namespace Kernel {
-
-SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory)
- : Object{kernel}, device_memory{device_memory} {}
-
-SharedMemory::~SharedMemory() = default;
-
-std::shared_ptr<SharedMemory> SharedMemory::Create(
- KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
- Memory::PageLinkedList&& page_list, Memory::MemoryPermission owner_permission,
- Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size,
- std::string name) {
-
- std::shared_ptr<SharedMemory> shared_memory{
- std::make_shared<SharedMemory>(kernel, device_memory)};
-
- shared_memory->owner_process = owner_process;
- shared_memory->page_list = std::move(page_list);
- shared_memory->owner_permission = owner_permission;
- shared_memory->user_permission = user_permission;
- shared_memory->physical_address = physical_address;
- shared_memory->size = size;
- shared_memory->name = name;
-
- return shared_memory;
-}
-
-ResultCode SharedMemory::Map(Process& target_process, VAddr address, std::size_t size,
- Memory::MemoryPermission permissions) {
- const u64 page_count{(size + Memory::PageSize - 1) / Memory::PageSize};
-
- if (page_list.GetNumPages() != page_count) {
- UNIMPLEMENTED_MSG("Page count does not match");
- }
-
- const Memory::MemoryPermission expected =
- &target_process == owner_process ? owner_permission : user_permission;
-
- if (permissions != expected) {
- UNIMPLEMENTED_MSG("Permission does not match");
- }
-
- return target_process.PageTable().MapPages(address, page_list, Memory::MemoryState::Shared,
- permissions);
-}
-
-} // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 26650a513..cc8fa6576 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -23,25 +23,25 @@
#include "core/cpu_manager.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/k_address_arbiter.h"
#include "core/hle/kernel/k_condition_variable.h"
#include "core/hle/kernel/k_event.h"
+#include "core/hle/kernel/k_memory_block.h"
+#include "core/hle/kernel/k_memory_layout.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
+#include "core/hle/kernel/k_shared_memory.h"
#include "core/hle/kernel/k_synchronization_object.h"
#include "core/hle/kernel/k_thread.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/memory_block.h"
-#include "core/hle/kernel/memory/memory_layout.h"
-#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/physical_core.h"
#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/shared_memory.h"
#include "core/hle/kernel/svc.h"
#include "core/hle/kernel/svc_results.h"
#include "core/hle/kernel/svc_types.h"
@@ -67,53 +67,53 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
// Helper function that performs the common sanity checks for svcMapMemory
// and svcUnmapMemory. This is doable, as both functions perform their sanitizing
// in the same order.
-ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr dst_addr,
- VAddr src_addr, u64 size) {
+ResultCode MapUnmapMemorySanityChecks(const KPageTable& manager, VAddr dst_addr, VAddr src_addr,
+ u64 size) {
if (!Common::Is4KBAligned(dst_addr)) {
LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!Common::Is4KBAligned(src_addr)) {
LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (size == 0) {
LOG_ERROR(Kernel_SVC, "Size is 0");
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!IsValidAddressRange(dst_addr, size)) {
LOG_ERROR(Kernel_SVC,
"Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
dst_addr, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!IsValidAddressRange(src_addr, size)) {
LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
src_addr, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!manager.IsInsideAddressSpace(src_addr, size)) {
LOG_ERROR(Kernel_SVC,
"Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
src_addr, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (manager.IsOutsideStackRegion(dst_addr, size)) {
LOG_ERROR(Kernel_SVC,
"Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}",
dst_addr, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
if (manager.IsInsideHeapRegion(dst_addr, size)) {
@@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
"Destination does not fit within the heap region, addr=0x{:016X}, "
"size=0x{:016X}",
dst_addr, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
if (manager.IsInsideAliasRegion(dst_addr, size)) {
@@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
"Destination does not fit within the map region, addr=0x{:016X}, "
"size=0x{:016X}",
dst_addr, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
return RESULT_SUCCESS;
@@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
enum class ResourceLimitValueType {
CurrentValue,
LimitValue,
+ PeakValue,
};
ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
@@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
const auto type = static_cast<LimitableResource>(resource_type);
if (!IsValidResourceType(type)) {
LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
- return ERR_INVALID_ENUM_VALUE;
+ return ResultInvalidEnumValue;
}
const auto* const current_process = system.Kernel().CurrentProcess();
@@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
if (!resource_limit_object) {
LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
resource_limit);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
- if (value_type == ResourceLimitValueType::CurrentValue) {
+ switch (value_type) {
+ case ResourceLimitValueType::CurrentValue:
return MakeResult(resource_limit_object->GetCurrentValue(type));
+ case ResourceLimitValueType::LimitValue:
+ return MakeResult(resource_limit_object->GetLimitValue(type));
+ case ResourceLimitValueType::PeakValue:
+ return MakeResult(resource_limit_object->GetPeakValue(type));
+ default:
+ LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type);
+ return ResultInvalidEnumValue;
}
-
- return MakeResult(resource_limit_object->GetLimitValue(type));
}
} // Anonymous namespace
@@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
if ((heap_size % 0x200000) != 0) {
LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
heap_size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (heap_size >= 0x200000000) {
LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -208,34 +215,34 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
if (!Common::Is4KBAligned(address)) {
LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (size == 0 || !Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.",
size);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!IsValidAddressRange(address, size)) {
LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})",
address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)};
- if (attributes != static_cast<Memory::MemoryAttribute>(mask) ||
- (attributes | Memory::MemoryAttribute::Uncached) != Memory::MemoryAttribute::Uncached) {
+ const auto attributes{static_cast<MemoryAttribute>(mask | attribute)};
+ if (attributes != static_cast<MemoryAttribute>(mask) ||
+ (attributes | MemoryAttribute::Uncached) != MemoryAttribute::Uncached) {
LOG_ERROR(Kernel_SVC,
"Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}",
attribute, mask);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
- return page_table.SetMemoryAttribute(address, size, static_cast<Memory::MemoryAttribute>(mask),
- static_cast<Memory::MemoryAttribute>(attribute));
+ return page_table.SetMemoryAttribute(address, size, static_cast<KMemoryAttribute>(mask),
+ static_cast<KMemoryAttribute>(attribute));
}
static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask,
@@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
LOG_ERROR(Kernel_SVC,
"Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
port_name_address);
- return ERR_NOT_FOUND;
+ return ResultNotFound;
}
static constexpr std::size_t PortNameMaxLength = 11;
@@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
if (port_name.size() > PortNameMaxLength) {
LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength,
port_name.size());
- return ERR_OUT_OF_RANGE;
+ return ResultOutOfRange;
}
LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
@@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
const auto it = kernel.FindNamedPort(port_name);
if (!kernel.IsValidNamedPort(it)) {
LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
- return ERR_NOT_FOUND;
+ return ResultNotFound;
}
- ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1));
-
auto client_port = it->second;
std::shared_ptr<ClientSession> client_session;
@@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
if (!session) {
LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
@@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
const Process* const owner_process = thread->GetOwnerProcess();
if (!owner_process) {
LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered.");
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
*process_id = owner_process->GetProcessID();
@@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
// NOTE: This should also handle debug objects before returning.
LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
@@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
LOG_ERROR(Kernel_SVC,
"Handle address is not a valid virtual address, handle_address=0x{:016X}",
handles_address);
- return ERR_INVALID_POINTER;
+ return ResultInvalidPointer;
}
static constexpr u64 MaxHandles = 0x40;
@@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
if (handle_count > MaxHandles) {
LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}",
MaxHandles, handle_count);
- return ERR_OUT_OF_RANGE;
+ return ResultOutOfRange;
}
auto& kernel = system.Kernel();
@@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
if (object == nullptr) {
LOG_ERROR(Kernel_SVC, "Object is a nullptr");
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
objects[i] = object.get();
@@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
// Get the thread from its handle.
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle);
+
if (!thread) {
LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle);
return ResultInvalidHandle;
@@ -502,7 +508,7 @@ static ResultCode ArbitrateLock(Core::System& system, Handle thread_handle, VAdd
thread_handle, address, tag);
// Validate the input address.
- if (Memory::IsKernelAddress(address)) {
+ if (IsKernelAddress(address)) {
LOG_ERROR(Kernel_SVC, "Attempting to arbitrate a lock on a kernel address (address={:08X})",
address);
return ResultInvalidCurrentMemory;
@@ -525,7 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);
// Validate the input address.
- if (Memory::IsKernelAddress(address)) {
+ if (IsKernelAddress(address)) {
LOG_ERROR(Kernel_SVC,
"Attempting to arbitrate an unlock on a kernel address (address={:08X})",
address);
@@ -735,7 +741,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
if (info_sub_id != 0) {
LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
info_sub_id);
- return ERR_INVALID_ENUM_VALUE;
+ return ResultInvalidEnumValue;
}
const auto& current_process_handle_table =
@@ -744,7 +750,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
if (!process) {
LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}",
info_id, info_sub_id, handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
switch (info_id_type) {
@@ -826,7 +832,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
}
LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
- return ERR_INVALID_ENUM_VALUE;
+ return ResultInvalidEnumValue;
}
case GetInfoType::IsCurrentProcessBeingDebugged:
@@ -836,13 +842,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
case GetInfoType::RegisterResourceLimit: {
if (handle != 0) {
LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
if (info_sub_id != 0) {
LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
info_sub_id);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
Process* const current_process = system.Kernel().CurrentProcess();
@@ -867,13 +873,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
if (handle != 0) {
LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}",
handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) {
LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}",
Process::RANDOM_ENTROPY_SIZE, info_sub_id);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
*result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
@@ -890,7 +896,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus,
info_sub_id);
- return ERR_INVALID_COMBINATION;
+ return ResultInvalidCombination;
}
const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>(
@@ -898,7 +904,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
if (!thread) {
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
static_cast<Handle>(handle));
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
const auto& core_timing = system.CoreTiming();
@@ -922,7 +928,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
default:
LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
- return ERR_INVALID_ENUM_VALUE;
+ return ResultInvalidEnumValue;
}
}
@@ -945,22 +951,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
if (!Common::Is4KBAligned(addr)) {
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (size == 0) {
LOG_ERROR(Kernel_SVC, "Size is zero");
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!(addr < addr + size)) {
LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
Process* const current_process{system.Kernel().CurrentProcess()};
@@ -968,21 +974,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
if (current_process->GetSystemResourceSize() == 0) {
LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
- return ERR_INVALID_STATE;
+ return ResultInvalidState;
}
if (!page_table.IsInsideAddressSpace(addr, size)) {
LOG_ERROR(Kernel_SVC,
"Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
if (page_table.IsOutsideAliasRegion(addr, size)) {
LOG_ERROR(Kernel_SVC,
"Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
return page_table.MapPhysicalMemory(addr, size);
@@ -999,22 +1005,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
if (!Common::Is4KBAligned(addr)) {
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (size == 0) {
LOG_ERROR(Kernel_SVC, "Size is zero");
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!(addr < addr + size)) {
LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
Process* const current_process{system.Kernel().CurrentProcess()};
@@ -1022,21 +1028,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
if (current_process->GetSystemResourceSize() == 0) {
LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
- return ERR_INVALID_STATE;
+ return ResultInvalidState;
}
if (!page_table.IsInsideAddressSpace(addr, size)) {
LOG_ERROR(Kernel_SVC,
"Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
if (page_table.IsOutsideAliasRegion(addr, size)) {
LOG_ERROR(Kernel_SVC,
"Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
return page_table.UnmapPhysicalMemory(addr, size);
@@ -1206,31 +1212,30 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
if (!Common::Is4KBAligned(addr)) {
LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (size == 0) {
LOG_ERROR(Kernel_SVC, "Size is 0");
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!IsValidAddressRange(addr, size)) {
LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
addr, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- const auto permission_type = static_cast<Memory::MemoryPermission>(permissions);
- if ((permission_type | Memory::MemoryPermission::Write) !=
- Memory::MemoryPermission::ReadAndWrite) {
+ const auto permission_type = static_cast<MemoryPermission>(permissions);
+ if ((permission_type | MemoryPermission::Write) != MemoryPermission::ReadWrite) {
LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}",
permissions);
- return ERR_INVALID_MEMORY_PERMISSIONS;
+ return ResultInvalidMemoryPermissions;
}
auto* const current_process{system.Kernel().CurrentProcess()};
@@ -1241,7 +1246,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
"Addr does not fit within the valid region, addr=0x{:016X}, "
"size=0x{:016X}",
addr, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
if (page_table.IsInsideHeapRegion(addr, size)) {
@@ -1249,7 +1254,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
"Addr does not fit within the heap region, addr=0x{:016X}, "
"size=0x{:016X}",
addr, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
if (page_table.IsInsideAliasRegion(addr, size)) {
@@ -1257,17 +1262,18 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
"Address does not fit within the map region, addr=0x{:016X}, "
"size=0x{:016X}",
addr, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
- auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)};
+ auto shared_memory{current_process->GetHandleTable().Get<KSharedMemory>(shared_memory_handle)};
if (!shared_memory) {
LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
shared_memory_handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
- return shared_memory->Map(*current_process, addr, size, permission_type);
+ return shared_memory->Map(*current_process, addr, size,
+ static_cast<KMemoryPermission>(permission_type));
}
static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr,
@@ -1285,7 +1291,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add
if (!process) {
LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
process_handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
auto& memory{system.Memory()};
@@ -1332,18 +1338,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
if (!Common::Is4KBAligned(src_address)) {
LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
src_address);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!Common::Is4KBAligned(dst_address)) {
LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
dst_address);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (size == 0 || !Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!IsValidAddressRange(dst_address, size)) {
@@ -1351,7 +1357,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
"Destination address range overflows the address space (dst_address=0x{:016X}, "
"size=0x{:016X}).",
dst_address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!IsValidAddressRange(src_address, size)) {
@@ -1359,7 +1365,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
"Source address range overflows the address space (src_address=0x{:016X}, "
"size=0x{:016X}).",
src_address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1367,7 +1373,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
if (!process) {
LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
process_handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
auto& page_table = process->PageTable();
@@ -1376,7 +1382,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
"Source address range is not within the address space (src_address=0x{:016X}, "
"size=0x{:016X}).",
src_address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1384,7 +1390,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
"Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
"size=0x{:016X}).",
dst_address, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
return page_table.MapProcessCodeMemory(dst_address, src_address, size);
@@ -1400,18 +1406,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
if (!Common::Is4KBAligned(dst_address)) {
LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
dst_address);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!Common::Is4KBAligned(src_address)) {
LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
src_address);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (size == 0 || Common::Is4KBAligned(size)) {
LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
- return ERR_INVALID_SIZE;
+ return ResultInvalidSize;
}
if (!IsValidAddressRange(dst_address, size)) {
@@ -1419,7 +1425,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
"Destination address range overflows the address space (dst_address=0x{:016X}, "
"size=0x{:016X}).",
dst_address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!IsValidAddressRange(src_address, size)) {
@@ -1427,7 +1433,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
"Source address range overflows the address space (src_address=0x{:016X}, "
"size=0x{:016X}).",
src_address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1435,7 +1441,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
if (!process) {
LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
process_handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
auto& page_table = process->PageTable();
@@ -1444,7 +1450,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
"Source address range is not within the address space (src_address=0x{:016X}, "
"size=0x{:016X}).",
src_address, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1452,7 +1458,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
"Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
"size=0x{:016X}).",
dst_address, size);
- return ERR_INVALID_MEMORY_RANGE;
+ return ResultInvalidMemoryRange;
}
return page_table.UnmapProcessCodeMemory(dst_address, src_address, size);
@@ -1515,8 +1521,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
return ResultInvalidPriority;
}
- ASSERT(process.GetResourceLimit()->Reserve(
- LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000));
+ KScopedResourceReservation thread_reservation(
+ kernel.CurrentProcess(), LimitableResource::Threads, 1,
+ system.CoreTiming().GetGlobalTimeNs().count() + 100000000);
+ if (!thread_reservation.Succeeded()) {
+ LOG_ERROR(Kernel_SVC, "Could not reserve a new thread");
+ return ResultResourceLimitedExceeded;
+ }
std::shared_ptr<KThread> thread;
{
@@ -1536,6 +1547,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
// Set the thread name for debugging purposes.
thread->SetName(
fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
+ thread_reservation.Commit();
return RESULT_SUCCESS;
}
@@ -1625,7 +1637,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr address,
cv_key, tag, timeout_ns);
// Validate input.
- if (Memory::IsKernelAddress(address)) {
+ if (IsKernelAddress(address)) {
LOG_ERROR(Kernel_SVC, "Attempted to wait on kernel address (address={:08X})", address);
return ResultInvalidCurrentMemory;
}
@@ -1707,7 +1719,7 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, Svc::Arbit
address, arb_type, value, timeout_ns);
// Validate input.
- if (Memory::IsKernelAddress(address)) {
+ if (IsKernelAddress(address)) {
LOG_ERROR(Kernel_SVC, "Attempting to wait on kernel address (address={:08X})", address);
return ResultInvalidCurrentMemory;
}
@@ -1752,7 +1764,7 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::Sign
address, signal_type, value, count);
// Validate input.
- if (Memory::IsKernelAddress(address)) {
+ if (IsKernelAddress(address)) {
LOG_ERROR(Kernel_SVC, "Attempting to signal to a kernel address (address={:08X})", address);
return ResultInvalidCurrentMemory;
}
@@ -1844,7 +1856,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle);
- return Svc::ResultInvalidHandle;
+ return ResultInvalidHandle;
}
static ResultCode ResetSignal32(Core::System& system, Handle handle) {
@@ -1860,30 +1872,37 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
if (!Common::Is4KBAligned(addr)) {
LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!Common::Is4KBAligned(size) || size == 0) {
LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
- return ERR_INVALID_ADDRESS;
+ return ResultInvalidAddress;
}
if (!IsValidAddressRange(addr, size)) {
LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
addr, size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
- const auto perms{static_cast<Memory::MemoryPermission>(permissions)};
- if (perms > Memory::MemoryPermission::ReadAndWrite ||
- perms == Memory::MemoryPermission::Write) {
+ const auto perms{static_cast<MemoryPermission>(permissions)};
+ if (perms > MemoryPermission::ReadWrite || perms == MemoryPermission::Write) {
LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
permissions);
- return ERR_INVALID_MEMORY_PERMISSIONS;
+ return ResultInvalidMemoryPermissions;
}
auto& kernel = system.Kernel();
- auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
+ // Reserve a new transfer memory from the process resource limit.
+ KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(),
+ LimitableResource::TransferMemory);
+ if (!trmem_reservation.Succeeded()) {
+ LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory");
+ return ResultResourceLimitedExceeded;
+ }
+ auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size,
+ static_cast<KMemoryPermission>(perms));
if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
return reserve_result;
@@ -1894,6 +1913,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
if (result.Failed()) {
return result.Code();
}
+ trmem_reservation.Commit();
*handle = *result;
return RESULT_SUCCESS;
@@ -1989,7 +2009,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw);
return set_result;
}
-
return RESULT_SUCCESS;
}
@@ -2002,8 +2021,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle
static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle);
+ auto& kernel = system.Kernel();
// Get the current handle table.
- const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+ const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
+
+ // Reserve a new event from the process resource limit.
+ KScopedResourceReservation event_reservation(kernel.CurrentProcess(),
+ LimitableResource::Events);
+ if (!event_reservation.Succeeded()) {
+ LOG_ERROR(Kernel, "Could not reserve a new event");
+ return ResultResourceLimitedExceeded;
+ }
// Get the writable event.
auto writable_event = handle_table.Get<KWritableEvent>(event_handle);
@@ -2012,6 +2040,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
return ResultInvalidHandle;
}
+ // Commit the successfuly reservation.
+ event_reservation.Commit();
+
return writable_event->Signal();
}
@@ -2043,7 +2074,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) {
LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle);
- return Svc::ResultInvalidHandle;
+ return ResultInvalidHandle;
}
static ResultCode ClearEvent32(Core::System& system, Handle event_handle) {
@@ -2106,13 +2137,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
if (!process) {
LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
process_handle);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
const auto info_type = static_cast<InfoType>(type);
if (info_type != InfoType::Status) {
LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type);
- return ERR_INVALID_ENUM_VALUE;
+ return ResultInvalidEnumValue;
}
*out = static_cast<u64>(process->GetStatus());
@@ -2174,7 +2205,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
const auto type = static_cast<LimitableResource>(resource_type);
if (!IsValidResourceType(type)) {
LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
- return ERR_INVALID_ENUM_VALUE;
+ return ResultInvalidEnumValue;
}
auto* const current_process = system.Kernel().CurrentProcess();
@@ -2185,16 +2216,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
if (!resource_limit_object) {
LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
resource_limit);
- return ERR_INVALID_HANDLE;
+ return ResultInvalidHandle;
}
const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
if (set_result.IsError()) {
- LOG_ERROR(
- Kernel_SVC,
- "Attempted to lower resource limit ({}) for category '{}' below its current value ({})",
- resource_limit_object->GetLimitValue(type), resource_type,
- resource_limit_object->GetCurrentValue(type));
+ LOG_ERROR(Kernel_SVC,
+ "Attempted to lower resource limit ({}) for category '{}' below its current "
+ "value ({})",
+ resource_limit_object->GetLimitValue(type), resource_type,
+ resource_limit_object->GetCurrentValue(type));
return set_result;
}
@@ -2211,7 +2242,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
LOG_ERROR(Kernel_SVC,
"Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
out_process_ids_size);
- return ERR_OUT_OF_RANGE;
+ return ResultOutOfRange;
}
const auto& kernel = system.Kernel();
@@ -2221,7 +2252,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
out_process_ids, total_copy_size)) {
LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
out_process_ids, out_process_ids + total_copy_size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
auto& memory = system.Memory();
@@ -2250,7 +2281,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
if ((out_thread_ids_size & 0xF0000000) != 0) {
LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
out_thread_ids_size);
- return ERR_OUT_OF_RANGE;
+ return ResultOutOfRange;
}
const auto* const current_process = system.Kernel().CurrentProcess();
@@ -2260,7 +2291,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
!current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) {
LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
out_thread_ids, out_thread_ids + total_copy_size);
- return ERR_INVALID_ADDRESS_STATE;
+ return ResultInvalidCurrentMemory;
}
auto& memory = system.Memory();
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
index 204cd989d..a26d9f2c9 100644
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -6,21 +6,36 @@
#include "core/hle/result.h"
-namespace Kernel::Svc {
+namespace Kernel {
+// Confirmed Switch kernel error codes
+
+constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7};
+constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14};
constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57};
constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
+constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101};
constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103};
+constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104};
+constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105};
constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
+constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108};
+constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110};
constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112};
constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113};
constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
+constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115};
constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116};
constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
+constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119};
constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
+constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121};
constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122};
+constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123};
constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
+constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126};
+constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132};
-} // namespace Kernel::Svc
+} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 765f408c3..cad063e4d 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -2,8 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/transfer_memory.h"
#include "core/hle/result.h"
@@ -17,12 +18,13 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory)
TransferMemory::~TransferMemory() {
// Release memory region when transfer memory is destroyed
Reset();
+ owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1);
}
std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel,
Core::Memory::Memory& memory,
VAddr base_address, std::size_t size,
- Memory::MemoryPermission permissions) {
+ KMemoryPermission permissions) {
std::shared_ptr<TransferMemory> transfer_memory{
std::make_shared<TransferMemory>(kernel, memory)};
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
index 777799d12..521951424 100644
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -6,7 +6,7 @@
#include <memory>
-#include "core/hle/kernel/memory/memory_block.h"
+#include "core/hle/kernel/k_memory_block.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/physical_memory.h"
@@ -36,7 +36,7 @@ public:
static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Core::Memory::Memory& memory,
VAddr base_address, std::size_t size,
- Memory::MemoryPermission permissions);
+ KMemoryPermission permissions);
TransferMemory(const TransferMemory&) = delete;
TransferMemory& operator=(const TransferMemory&) = delete;
@@ -82,7 +82,7 @@ private:
std::size_t size{};
/// The memory permissions that are applied to this instance.
- Memory::MemoryPermission owner_permissions{};
+ KMemoryPermission owner_permissions{};
/// The process that this transfer memory instance was created under.
Process* owner_process{};
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 3ec0e1eca..615e20a54 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -508,7 +508,7 @@ public:
{1, &IManagerForApplication::GetAccountId, "GetAccountId"},
{2, nullptr, "EnsureIdTokenCacheAsync"},
{3, nullptr, "LoadIdTokenCache"},
- {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"},
+ {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
{150, nullptr, "CreateAuthorizationRequest"},
{160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
{170, nullptr, "LoadNetworkServiceLicenseKindAsync"},
@@ -534,6 +534,22 @@ private:
rb.PushRaw<u64>(user_id.GetNintendoID());
}
+ void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+ std::vector<u8> nas_user_base_for_application(0x68);
+ ctx.WriteBuffer(nas_user_base_for_application, 0);
+
+ if (ctx.CanWriteBuffer(1)) {
+ std::vector<u8> unknown_out_buffer(ctx.GetWriteBufferSize(1));
+ ctx.WriteBuffer(unknown_out_buffer, 1);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushRaw<u64>(user_id.GetNintendoID());
+ }
+
void StoreOpenContext(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index bb77c2569..8e1fe9438 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1047,20 +1047,21 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
const u64 offset{rp.Pop<u64>()};
const std::vector<u8> data{ctx.ReadBuffer()};
+ const std::size_t size{std::min(data.size(), backing.GetSize() - offset)};
- LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());
+ LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
- if (data.size() > backing.GetSize() - offset) {
+ if (offset > backing.GetSize()) {
LOG_ERROR(Service_AM,
"offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
- backing.GetSize(), data.size(), offset);
+ backing.GetSize(), size, offset);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
return;
}
- std::memcpy(backing.GetData().data() + offset, data.data(), data.size());
+ std::memcpy(backing.GetData().data() + offset, data.data(), size);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
@@ -1070,11 +1071,11 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const u64 offset{rp.Pop<u64>()};
- const std::size_t size{ctx.GetWriteBufferSize()};
+ const std::size_t size{std::min(ctx.GetWriteBufferSize(), backing.GetSize() - offset)};
LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
- if (size > backing.GetSize() - offset) {
+ if (offset > backing.GetSize()) {
LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
backing.GetSize(), size, offset);
diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp
index d7d3ee99a..c2bfe698f 100644
--- a/src/core/hle/service/am/applets/controller.cpp
+++ b/src/core/hle/service/am/applets/controller.cpp
@@ -211,7 +211,8 @@ void Controller::Execute() {
case ControllerSupportMode::ShowControllerFirmwareUpdate:
UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
controller_private_arg.mode);
- [[fallthrough]];
+ ConfigurationComplete();
+ break;
default: {
ConfigurationComplete();
break;
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 3022438b1..79b209c6b 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -121,6 +121,10 @@ void SoftwareKeyboard::ExecuteInteractive() {
std::memcpy(&request, data.data(), sizeof(Request));
switch (request) {
+ case Request::Finalize:
+ complete = true;
+ broker.SignalStateChanged();
+ break;
case Request::Calc: {
broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1}));
broker.SignalStateChanged();
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index dbf198345..70b9f3824 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -21,6 +21,7 @@
namespace Service::HID {
constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
+constexpr s32 HID_TRIGGER_MAX = 0x7fff;
[[maybe_unused]] constexpr s32 HID_JOYSTICK_MIN = -0x7fff;
constexpr std::size_t NPAD_OFFSET = 0x9A00;
constexpr u32 BATTERY_FULL = 2;
@@ -48,6 +49,8 @@ Controller_NPad::NPadControllerType Controller_NPad::MapSettingsTypeToNPad(
return NPadControllerType::JoyRight;
case Settings::ControllerType::Handheld:
return NPadControllerType::Handheld;
+ case Settings::ControllerType::GameCube:
+ return NPadControllerType::GameCube;
default:
UNREACHABLE();
return NPadControllerType::ProController;
@@ -67,6 +70,8 @@ Settings::ControllerType Controller_NPad::MapNPadToSettingsType(
return Settings::ControllerType::RightJoycon;
case NPadControllerType::Handheld:
return Settings::ControllerType::Handheld;
+ case NPadControllerType::GameCube:
+ return Settings::ControllerType::GameCube;
default:
UNREACHABLE();
return Settings::ControllerType::ProController;
@@ -209,6 +214,13 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
controller.assignment_mode = NpadAssignments::Single;
controller.footer_type = AppletFooterUiType::JoyRightHorizontal;
break;
+ case NPadControllerType::GameCube:
+ controller.style_set.gamecube.Assign(1);
+ // The GC Controller behaves like a wired Pro Controller
+ controller.device_type.fullkey.Assign(1);
+ controller.system_properties.is_vertical.Assign(1);
+ controller.system_properties.use_plus.Assign(1);
+ break;
case NPadControllerType::Pokeball:
controller.style_set.palma.Assign(1);
controller.device_type.palma.Assign(1);
@@ -259,6 +271,7 @@ void Controller_NPad::OnInit() {
style.joycon_right.Assign(1);
style.joycon_dual.Assign(1);
style.fullkey.Assign(1);
+ style.gamecube.Assign(1);
style.palma.Assign(1);
}
@@ -339,6 +352,7 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
auto& pad_state = npad_pad_states[controller_idx].pad_states;
auto& lstick_entry = npad_pad_states[controller_idx].l_stick;
auto& rstick_entry = npad_pad_states[controller_idx].r_stick;
+ auto& trigger_entry = npad_trigger_states[controller_idx];
const auto& button_state = buttons[controller_idx];
const auto& analog_state = sticks[controller_idx];
const auto [stick_l_x_f, stick_l_y_f] =
@@ -404,6 +418,17 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus());
pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus());
}
+
+ if (controller_type == NPadControllerType::GameCube) {
+ trigger_entry.l_analog = static_cast<s32>(
+ button_state[ZL - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
+ trigger_entry.r_analog = static_cast<s32>(
+ button_state[ZR - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
+ pad_state.zl.Assign(false);
+ pad_state.zr.Assign(button_state[R - BUTTON_HID_BEGIN]->GetStatus());
+ pad_state.l.Assign(button_state[ZL - BUTTON_HID_BEGIN]->GetStatus());
+ pad_state.r.Assign(button_state[ZR - BUTTON_HID_BEGIN]->GetStatus());
+ }
}
void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
@@ -418,6 +443,11 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
&npad.joy_left_states, &npad.joy_right_states, &npad.palma_states,
&npad.system_ext_states};
+ // There is the posibility to have more controllers with analog triggers
+ const std::array<TriggerGeneric*, 1> controller_triggers{
+ &npad.gc_trigger_states,
+ };
+
for (auto* main_controller : controller_npads) {
main_controller->common.entry_count = 16;
main_controller->common.total_entry_count = 17;
@@ -435,6 +465,21 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
cur_entry.timestamp2 = cur_entry.timestamp;
}
+ for (auto* analog_trigger : controller_triggers) {
+ analog_trigger->entry_count = 16;
+ analog_trigger->total_entry_count = 17;
+
+ const auto& last_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
+
+ analog_trigger->timestamp = core_timing.GetCPUTicks();
+ analog_trigger->last_entry_index = (analog_trigger->last_entry_index + 1) % 17;
+
+ auto& cur_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
+
+ cur_entry.timestamp = last_entry.timestamp + 1;
+ cur_entry.timestamp2 = cur_entry.timestamp;
+ }
+
const auto& controller_type = connected_controllers[i].type;
if (controller_type == NPadControllerType::None || !connected_controllers[i].is_connected) {
@@ -444,6 +489,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
RequestPadStateUpdate(npad_index);
auto& pad_state = npad_pad_states[npad_index];
+ auto& trigger_state = npad_trigger_states[npad_index];
auto& main_controller =
npad.fullkey_states.npad[npad.fullkey_states.common.last_entry_index];
@@ -456,6 +502,8 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
auto& pokeball_entry = npad.palma_states.npad[npad.palma_states.common.last_entry_index];
auto& libnx_entry =
npad.system_ext_states.npad[npad.system_ext_states.common.last_entry_index];
+ auto& trigger_entry =
+ npad.gc_trigger_states.trigger[npad.gc_trigger_states.last_entry_index];
libnx_entry.connection_status.raw = 0;
libnx_entry.connection_status.is_connected.Assign(1);
@@ -524,6 +572,18 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
libnx_entry.connection_status.is_right_connected.Assign(1);
break;
+ case NPadControllerType::GameCube:
+ main_controller.connection_status.raw = 0;
+ main_controller.connection_status.is_connected.Assign(1);
+ main_controller.connection_status.is_wired.Assign(1);
+ main_controller.pad.pad_states.raw = pad_state.pad_states.raw;
+ main_controller.pad.l_stick = pad_state.l_stick;
+ main_controller.pad.r_stick = pad_state.r_stick;
+ trigger_entry.l_analog = trigger_state.l_analog;
+ trigger_entry.r_analog = trigger_state.r_analog;
+
+ libnx_entry.connection_status.is_wired.Assign(1);
+ break;
case NPadControllerType::Pokeball:
pokeball_entry.connection_status.raw = 0;
pokeball_entry.connection_status.is_connected.Assign(1);
@@ -674,6 +734,7 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
right_sixaxis_entry.orientation = motion_devices[1].orientation;
}
break;
+ case NPadControllerType::GameCube:
case NPadControllerType::Pokeball:
break;
}
@@ -1135,6 +1196,8 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
return style.joycon_left;
case NPadControllerType::JoyRight:
return style.joycon_right;
+ case NPadControllerType::GameCube:
+ return style.gamecube;
case NPadControllerType::Pokeball:
return style.palma;
default:
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 48bab988c..bc2e6779d 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -51,6 +51,7 @@ public:
JoyDual,
JoyLeft,
JoyRight,
+ GameCube,
Pokeball,
};
@@ -60,6 +61,7 @@ public:
JoyconDual = 5,
JoyconLeft = 6,
JoyconRight = 7,
+ GameCube = 8,
Pokeball = 9,
MaxNpadType = 10,
};
@@ -389,6 +391,25 @@ private:
};
static_assert(sizeof(SixAxisGeneric) == 0x708, "SixAxisGeneric is an invalid size");
+ struct TriggerState {
+ s64_le timestamp{};
+ s64_le timestamp2{};
+ s32_le l_analog{};
+ s32_le r_analog{};
+ };
+ static_assert(sizeof(TriggerState) == 0x18, "TriggerState is an invalid size");
+
+ struct TriggerGeneric {
+ INSERT_PADDING_BYTES(0x4);
+ s64_le timestamp;
+ INSERT_PADDING_BYTES(0x4);
+ s64_le total_entry_count;
+ s64_le last_entry_index;
+ s64_le entry_count;
+ std::array<TriggerState, 17> trigger{};
+ };
+ static_assert(sizeof(TriggerGeneric) == 0x1C8, "TriggerGeneric is an invalid size");
+
struct NPadSystemProperties {
union {
s64_le raw{};
@@ -509,7 +530,9 @@ private:
AppletFooterUiType footer_type;
// nfc_states needs to be checked switchbrew does not match with HW
NfcXcdHandle nfc_states;
- INSERT_PADDING_BYTES(0xdef);
+ INSERT_PADDING_BYTES(0x8); // Mutex
+ TriggerGeneric gc_trigger_states;
+ INSERT_PADDING_BYTES(0xc1f);
};
static_assert(sizeof(NPadEntry) == 0x5000, "NPadEntry is an invalid size");
@@ -560,6 +583,7 @@ private:
f32 sixaxis_fusion_parameter2{};
bool sixaxis_at_rest{true};
std::array<ControllerPad, 10> npad_pad_states{};
+ std::array<TriggerState, 10> npad_trigger_states{};
bool is_in_lr_assignment_mode{false};
Core::System& system;
};
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 51a010a55..ba27bbb05 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -15,9 +15,9 @@
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
#include "core/hle/kernel/k_readable_event.h"
+#include "core/hle/kernel/k_shared_memory.h"
#include "core/hle/kernel/k_writable_event.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/shared_memory.h"
#include "core/hle/service/hid/errors.h"
#include "core/hle/service/hid/hid.h"
#include "core/hle/service/hid/irs.h"
@@ -110,6 +110,7 @@ void IAppletResource::DeactivateController(HidController controller) {
IAppletResource ::~IAppletResource() {
system.CoreTiming().UnscheduleEvent(pad_update_event, 0);
+ system.CoreTiming().UnscheduleEvent(motion_update_event, 0);
}
void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -272,8 +273,8 @@ Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} {
{204, &Hid::PermitVibration, "PermitVibration"},
{205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"},
{206, &Hid::SendVibrationValues, "SendVibrationValues"},
- {207, nullptr, "SendVibrationGcErmCommand"},
- {208, nullptr, "GetActualVibrationGcErmCommand"},
+ {207, &Hid::SendVibrationGcErmCommand, "SendVibrationGcErmCommand"},
+ {208, &Hid::GetActualVibrationGcErmCommand, "GetActualVibrationGcErmCommand"},
{209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"},
{210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"},
{211, &Hid::IsVibrationDeviceMounted, "IsVibrationDeviceMounted"},
@@ -1092,7 +1093,22 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) {
VibrationDeviceInfo vibration_device_info;
- vibration_device_info.type = VibrationDeviceType::LinearResonantActuator;
+ switch (vibration_device_handle.npad_type) {
+ case Controller_NPad::NpadType::ProController:
+ case Controller_NPad::NpadType::Handheld:
+ case Controller_NPad::NpadType::JoyconDual:
+ case Controller_NPad::NpadType::JoyconLeft:
+ case Controller_NPad::NpadType::JoyconRight:
+ default:
+ vibration_device_info.type = VibrationDeviceType::LinearResonantActuator;
+ break;
+ case Controller_NPad::NpadType::GameCube:
+ vibration_device_info.type = VibrationDeviceType::GcErm;
+ break;
+ case Controller_NPad::NpadType::Pokeball:
+ vibration_device_info.type = VibrationDeviceType::Unknown;
+ break;
+ }
switch (vibration_device_handle.device_index) {
case Controller_NPad::DeviceIndex::Left:
@@ -1214,6 +1230,108 @@ void Hid::SendVibrationValues(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ struct Parameters {
+ Controller_NPad::DeviceHandle vibration_device_handle;
+ u64 applet_resource_user_id;
+ VibrationGcErmCommand gc_erm_command;
+ };
+ static_assert(sizeof(Parameters) == 0x18, "Parameters has incorrect size.");
+
+ const auto parameters{rp.PopRaw<Parameters>()};
+
+ /**
+ * Note: This uses yuzu-specific behavior such that the StopHard command produces
+ * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined below,
+ * in order to differentiate between Stop and StopHard commands.
+ * This is done to reuse the controller vibration functions made for regular controllers.
+ */
+ const auto vibration_value = [parameters] {
+ switch (parameters.gc_erm_command) {
+ case VibrationGcErmCommand::Stop:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 0.0f,
+ .freq_low = 160.0f,
+ .amp_high = 0.0f,
+ .freq_high = 320.0f,
+ };
+ case VibrationGcErmCommand::Start:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 1.0f,
+ .freq_low = 160.0f,
+ .amp_high = 1.0f,
+ .freq_high = 320.0f,
+ };
+ case VibrationGcErmCommand::StopHard:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 0.0f,
+ .freq_low = 0.0f,
+ .amp_high = 0.0f,
+ .freq_high = 0.0f,
+ };
+ default:
+ return Controller_NPad::DEFAULT_VIBRATION_VALUE;
+ }
+ }();
+
+ applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .VibrateController(parameters.vibration_device_handle, vibration_value);
+
+ LOG_DEBUG(Service_HID,
+ "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}, "
+ "gc_erm_command={}",
+ parameters.vibration_device_handle.npad_type,
+ parameters.vibration_device_handle.npad_id,
+ parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id,
+ parameters.gc_erm_command);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ struct Parameters {
+ Controller_NPad::DeviceHandle vibration_device_handle;
+ INSERT_PADDING_WORDS_NOINIT(1);
+ u64 applet_resource_user_id;
+ };
+
+ const auto parameters{rp.PopRaw<Parameters>()};
+
+ const auto last_vibration = applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .GetLastVibration(parameters.vibration_device_handle);
+
+ const auto gc_erm_command = [last_vibration] {
+ if (last_vibration.amp_low != 0.0f || last_vibration.amp_high != 0.0f) {
+ return VibrationGcErmCommand::Start;
+ }
+
+ /**
+ * Note: This uses yuzu-specific behavior such that the StopHard command produces
+ * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined in the HID function
+ * SendVibrationGcErmCommand, in order to differentiate between Stop and StopHard commands.
+ * This is done to reuse the controller vibration functions made for regular controllers.
+ */
+ if (last_vibration.freq_low == 0.0f && last_vibration.freq_high == 0.0f) {
+ return VibrationGcErmCommand::StopHard;
+ }
+
+ return VibrationGcErmCommand::Stop;
+ }();
+
+ LOG_DEBUG(Service_HID,
+ "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}",
+ parameters.vibration_device_handle.npad_type,
+ parameters.vibration_device_handle.npad_id,
+ parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushEnum(gc_erm_command);
+}
+
void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 7cc0433e2..36ed228c8 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -14,7 +14,7 @@ struct EventType;
}
namespace Kernel {
-class SharedMemory;
+class KSharedMemory;
}
namespace Service::SM {
@@ -69,7 +69,7 @@ private:
void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late);
void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late);
- std::shared_ptr<Kernel::SharedMemory> shared_mem;
+ std::shared_ptr<Kernel::KSharedMemory> shared_mem;
std::shared_ptr<Core::Timing::EventType> pad_update_event;
std::shared_ptr<Core::Timing::EventType> motion_update_event;
@@ -136,6 +136,8 @@ private:
void PermitVibration(Kernel::HLERequestContext& ctx);
void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
void SendVibrationValues(Kernel::HLERequestContext& ctx);
+ void SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx);
+ void GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx);
void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx);
void EndPermitVibrationSession(Kernel::HLERequestContext& ctx);
void IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx);
@@ -154,7 +156,9 @@ private:
void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
enum class VibrationDeviceType : u32 {
+ Unknown = 0,
LinearResonantActuator = 1,
+ GcErm = 2,
};
enum class VibrationDevicePosition : u32 {
@@ -163,6 +167,12 @@ private:
Right = 2,
};
+ enum class VibrationGcErmCommand : u64 {
+ Stop = 0,
+ Start = 1,
+ StopHard = 2,
+ };
+
struct VibrationDeviceInfo {
VibrationDeviceType type{};
VibrationDevicePosition position{};
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index c8413099f..2dfa936fb 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -6,8 +6,8 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_shared_memory.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/shared_memory.h"
#include "core/hle/service/hid/irs.h"
namespace Service::HID {
diff --git a/src/core/hle/service/hid/irs.h b/src/core/hle/service/hid/irs.h
index be0c486ba..b0c8c7168 100644
--- a/src/core/hle/service/hid/irs.h
+++ b/src/core/hle/service/hid/irs.h
@@ -12,7 +12,7 @@ class System;
}
namespace Kernel {
-class SharedMemory;
+class KSharedMemory;
}
namespace Service::HID {
@@ -42,7 +42,7 @@ private:
void StopImageProcessorAsync(Kernel::HLERequestContext& ctx);
void ActivateIrsensorWithFunctionLevel(Kernel::HLERequestContext& ctx);
- std::shared_ptr<Kernel::SharedMemory> shared_mem;
+ std::shared_ptr<Kernel::KSharedMemory> shared_mem;
const u32 device_handle{0xABCD};
};
diff --git a/src/core/hle/service/ldn/errors.h b/src/core/hle/service/ldn/errors.h
new file mode 100644
index 000000000..a718c5c66
--- /dev/null
+++ b/src/core/hle/service/ldn/errors.h
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::LDN {
+
+constexpr ResultCode ERROR_DISABLED{ErrorModule::LDN, 22};
+
+} // namespace Service::LDN
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index ee908f399..c630d93cd 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -6,6 +6,7 @@
#include "core/hle/ipc_helpers.h"
#include "core/hle/result.h"
+#include "core/hle/service/ldn/errors.h"
#include "core/hle/service/ldn/ldn.h"
#include "core/hle/service/sm/sm.h"
@@ -103,7 +104,7 @@ public:
: ServiceFramework{system_, "IUserLocalCommunicationService"} {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetState"},
+ {0, &IUserLocalCommunicationService::GetState, "GetState"},
{1, nullptr, "GetNetworkInfo"},
{2, nullptr, "GetIpv4Address"},
{3, nullptr, "GetDisconnectReason"},
@@ -138,13 +139,38 @@ public:
RegisterHandlers(functions);
}
- void Initialize2(Kernel::HLERequestContext& ctx) {
+ void GetState(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_LDN, "(STUBBED) called");
- // Result success seem make this services start network and continue.
- // If we just pass result error then it will stop and maybe try again and again.
+
+ IPC::ResponseBuilder rb{ctx, 3};
+
+ // Indicate a network error, as we do not actually emulate LDN
+ rb.Push(static_cast<u32>(State::Error));
+
+ rb.Push(RESULT_SUCCESS);
+ }
+
+ void Initialize2(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_LDN, "called");
+
+ is_initialized = true;
+
IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(RESULT_UNKNOWN);
+ rb.Push(RESULT_SUCCESS);
}
+
+private:
+ enum class State {
+ None,
+ Initialized,
+ AccessPointOpened,
+ AccessPointCreated,
+ StationOpened,
+ StationConnected,
+ Error,
+ };
+
+ bool is_initialized{};
};
class LDNS final : public ServiceFramework<LDNS> {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9da786b4e..d111c1357 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -11,10 +11,10 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/memory/system_control.h"
+#include "core/hle/kernel/k_page_table.h"
+#include "core/hle/kernel/k_system_control.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
#include "core/hle/service/ldr/ldr.h"
#include "core/hle/service/service.h"
#include "core/loader/nro.h"
@@ -287,12 +287,11 @@ public:
rb.Push(RESULT_SUCCESS);
}
- bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start,
- std::size_t size) const {
- constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize};
+ bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const {
+ constexpr std::size_t padding_size{4 * Kernel::PageSize};
const auto start_info{page_table.QueryInfo(start - 1)};
- if (start_info.state != Kernel::Memory::MemoryState::Free) {
+ if (start_info.state != Kernel::KMemoryState::Free) {
return {};
}
@@ -302,21 +301,20 @@ public:
const auto end_info{page_table.QueryInfo(start + size)};
- if (end_info.state != Kernel::Memory::MemoryState::Free) {
+ if (end_info.state != Kernel::KMemoryState::Free) {
return {};
}
return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize());
}
- VAddr GetRandomMapRegion(const Kernel::Memory::PageTable& page_table, std::size_t size) const {
+ VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const {
VAddr addr{};
const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >>
- Kernel::Memory::PageBits};
+ Kernel::PageBits};
do {
addr = page_table.GetAliasCodeRegionStart() +
- (Kernel::Memory::SystemControl::GenerateRandomRange(0, end_pages)
- << Kernel::Memory::PageBits);
+ (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits);
} while (!page_table.IsInsideAddressSpace(addr, size) ||
page_table.IsInsideHeapRegion(addr, size) ||
page_table.IsInsideAliasRegion(addr, size));
@@ -330,7 +328,7 @@ public:
const VAddr addr{GetRandomMapRegion(page_table, size)};
const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)};
- if (result == Kernel::ERR_INVALID_ADDRESS_STATE) {
+ if (result == Kernel::ResultInvalidCurrentMemory) {
continue;
}
@@ -361,7 +359,7 @@ public:
const ResultCode result{
page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)};
- if (result == Kernel::ERR_INVALID_ADDRESS_STATE) {
+ if (result == Kernel::ResultInvalidCurrentMemory) {
continue;
}
@@ -387,7 +385,7 @@ public:
const VAddr data_start{start + nro_header.segment_headers[DATA_INDEX].memory_offset};
const VAddr bss_start{data_start + nro_header.segment_headers[DATA_INDEX].memory_size};
const VAddr bss_end_addr{
- Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)};
+ Common::AlignUp(bss_start + nro_header.bss_size, Kernel::PageSize)};
auto CopyCode{[&](VAddr src_addr, VAddr dst_addr, u64 size) {
std::vector<u8> source_data(size);
@@ -402,12 +400,12 @@ public:
nro_header.segment_headers[DATA_INDEX].memory_size);
CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(
- text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute));
- CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(
- ro_start, data_start - ro_start, Kernel::Memory::MemoryPermission::Read));
+ text_start, ro_start - text_start, Kernel::KMemoryPermission::ReadAndExecute));
+ CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(ro_start, data_start - ro_start,
+ Kernel::KMemoryPermission::Read));
return process->PageTable().SetCodeMemoryPermission(
- data_start, bss_end_addr - data_start, Kernel::Memory::MemoryPermission::ReadAndWrite);
+ data_start, bss_end_addr - data_start, Kernel::KMemoryPermission::ReadAndWrite);
}
void LoadNro(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 5d6d25696..2d1d4d67f 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -215,7 +215,7 @@ private:
const auto& amiibo = nfp_interface.GetAmiiboBuffer();
const TagInfo tag_info{
.uuid = amiibo.uuid,
- .uuid_length = static_cast<u8>(tag_info.uuid.size()),
+ .uuid_length = static_cast<u8>(amiibo.uuid.size()),
.padding_1 = {},
.protocol = 1, // TODO(ogniK): Figure out actual values
.tag_type = 2,
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index b6ac0a81a..fcd15d81f 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -19,9 +19,9 @@
#include "core/file_sys/romfs.h"
#include "core/file_sys/system_archive/system_archive.h"
#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/k_shared_memory.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/physical_memory.h"
-#include "core/hle/kernel/shared_memory.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/ns/pl_u.h"
@@ -131,7 +131,7 @@ struct PL_U::Impl {
}
/// Handle to shared memory region designated for a shared font
- std::shared_ptr<Kernel::SharedMemory> shared_font_mem;
+ std::shared_ptr<Kernel::KSharedMemory> shared_font_mem;
/// Backing memory for the shared font data
std::shared_ptr<Kernel::PhysicalMemory> shared_font;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index 36970f828..ecba1dba1 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -34,8 +34,7 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
case 0xa: {
if (command.length == 0x1c) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
- Tegra::ChCommandHeaderList cmdlist(1);
- cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
+ Tegra::ChCommandHeaderList cmdlist{{0xDEADB33F}};
system.GPU().PushCommandBuffer(cmdlist);
}
return UnmapBuffer(input, output);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 72499654c..70849a9bd 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -28,8 +28,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
return GetWaitbase(input, output);
case 0x9:
return MapBuffer(input, output);
- case 0xa:
+ case 0xa: {
+ if (command.length == 0x1c) {
+ Tegra::ChCommandHeaderList cmdlist{{0xDEADB33F}};
+ system.GPU().PushCommandBuffer(cmdlist);
+ }
return UnmapBuffer(input, output);
+ }
default:
break;
}
diff --git a/src/core/hle/service/olsc/olsc.cpp b/src/core/hle/service/olsc/olsc.cpp
index 4440135ed..e2ac71fa1 100644
--- a/src/core/hle/service/olsc/olsc.cpp
+++ b/src/core/hle/service/olsc/olsc.cpp
@@ -17,7 +17,7 @@ public:
static const FunctionInfo functions[] = {
{0, &OLSC::Initialize, "Initialize"},
{10, nullptr, "VerifySaveDataBackupLicenseAsync"},
- {13, nullptr, "GetSaveDataBackupSetting"},
+ {13, &OLSC::GetSaveDataBackupSetting, "GetSaveDataBackupSetting"},
{14, &OLSC::SetSaveDataBackupSettingEnabled, "SetSaveDataBackupSettingEnabled"},
{15, nullptr, "SetCustomData"},
{16, nullptr, "DeleteSaveDataBackupSetting"},
@@ -52,6 +52,17 @@ private:
rb.Push(RESULT_SUCCESS);
}
+ void GetSaveDataBackupSetting(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_OLSC, "(STUBBED) called");
+
+ // backup_setting is set to 0 since real value is unknown
+ constexpr u64 backup_setting = 0;
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push(backup_setting);
+ }
+
void SetSaveDataBackupSettingEnabled(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_OLSC, "(STUBBED) called");
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 0b306b87a..78e9cd708 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -453,7 +453,8 @@ std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protoco
return {-1, Errno::MFILE};
}
- FileDescriptor& descriptor = file_descriptors[fd].emplace();
+ file_descriptors[fd] = FileDescriptor{};
+ FileDescriptor& descriptor = *file_descriptors[fd];
// ENONMEM might be thrown here
LOG_INFO(Service, "New socket fd={}", fd);
@@ -548,7 +549,8 @@ std::pair<s32, Errno> BSD::AcceptImpl(s32 fd, std::vector<u8>& write_buffer) {
return {-1, Translate(bsd_errno)};
}
- FileDescriptor& new_descriptor = file_descriptors[new_fd].emplace();
+ file_descriptors[new_fd] = FileDescriptor{};
+ FileDescriptor& new_descriptor = *file_descriptors[new_fd];
new_descriptor.socket = std::move(result.socket);
new_descriptor.is_connection_based = descriptor.is_connection_based;
diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp
index 858623e2b..1f7309f6b 100644
--- a/src/core/hle/service/time/time_manager.cpp
+++ b/src/core/hle/service/time/time_manager.cpp
@@ -279,6 +279,10 @@ const SharedMemory& TimeManager::GetSharedMemory() const {
return impl->shared_memory;
}
+void TimeManager::Shutdown() {
+ impl.reset();
+}
+
void TimeManager::UpdateLocalSystemClockTime(s64 posix_time) {
impl->UpdateLocalSystemClockTime(system, posix_time);
}
diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h
index 993c7c288..4db8cc0e1 100644
--- a/src/core/hle/service/time/time_manager.h
+++ b/src/core/hle/service/time/time_manager.h
@@ -61,6 +61,8 @@ public:
const SharedMemory& GetSharedMemory() const;
+ void Shutdown();
+
void SetupTimeZoneManager(std::string location_name,
Clock::SteadyClockTimePoint time_zone_updated_time_point,
std::size_t total_location_name_count, u128 time_zone_rule_version,
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index e0ae9f874..4d8de81be 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -22,7 +22,7 @@ SharedMemory::SharedMemory(Core::System& system) : system(system) {
SharedMemory::~SharedMemory() = default;
-std::shared_ptr<Kernel::SharedMemory> SharedMemory::GetSharedMemoryHolder() const {
+std::shared_ptr<Kernel::KSharedMemory> SharedMemory::GetSharedMemoryHolder() const {
return shared_memory_holder;
}
diff --git a/src/core/hle/service/time/time_sharedmemory.h b/src/core/hle/service/time/time_sharedmemory.h
index e0c3e63da..299680517 100644
--- a/src/core/hle/service/time/time_sharedmemory.h
+++ b/src/core/hle/service/time/time_sharedmemory.h
@@ -6,8 +6,8 @@
#include "common/common_types.h"
#include "common/uuid.h"
+#include "core/hle/kernel/k_shared_memory.h"
#include "core/hle/kernel/k_thread.h"
-#include "core/hle/kernel/shared_memory.h"
#include "core/hle/service/time/clock_types.h"
namespace Service::Time {
@@ -18,7 +18,7 @@ public:
~SharedMemory();
// Return the shared memory handle
- std::shared_ptr<Kernel::SharedMemory> GetSharedMemoryHolder() const;
+ std::shared_ptr<Kernel::KSharedMemory> GetSharedMemoryHolder() const;
// TODO(ogniK): We have to properly simulate memory barriers, how are we going to do this?
template <typename T, std::size_t Offset>
@@ -63,7 +63,7 @@ public:
void SetAutomaticCorrectionEnabled(bool is_enabled);
private:
- std::shared_ptr<Kernel::SharedMemory> shared_memory_holder;
+ std::shared_ptr<Kernel::KSharedMemory> shared_memory_holder;
Core::System& system;
Format shared_memory_format{};
};
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 79ebf11de..4a10211f6 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -12,8 +12,8 @@
#include "core/file_sys/control_metadata.h"
#include "core/file_sys/patch_manager.h"
#include "core/file_sys/romfs_factory.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/loader/deconstructed_rom_directory.h"
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index dca1fcb18..f4a339390 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -10,7 +10,7 @@
#include "common/file_util.h"
#include "common/logging/log.h"
#include "core/hle/kernel/code_set.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/process.h"
#include "core/loader/elf.h"
#include "core/memory.h"
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index e162c4ff0..3f4ba233d 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -6,7 +6,7 @@
#include "core/file_sys/kernel_executable.h"
#include "core/file_sys/program_metadata.h"
#include "core/hle/kernel/code_set.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/process.h"
#include "core/loader/kip.h"
#include "core/memory.h"
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index f976d0a9c..14618cb40 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -15,8 +15,8 @@
#include "core/file_sys/romfs_factory.h"
#include "core/file_sys/vfs_offset.h"
#include "core/hle/kernel/code_set.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_thread.h"
-#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/loader/nro.h"
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index ea347ea83..cbd048695 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -15,8 +15,8 @@
#include "core/core.h"
#include "core/file_sys/patch_manager.h"
#include "core/hle/kernel/code_set.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_thread.h"
-#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
#include "core/loader/nso.h"
#include "core/memory.h"
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 11609682a..b9dd3e275 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -16,7 +16,7 @@
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/device_memory.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/physical_memory.h"
#include "core/hle/kernel/process.h"
#include "core/memory.h"
diff --git a/src/core/memory.h b/src/core/memory.h
index 705ebb23d..6d34fcfe2 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -116,6 +116,11 @@ public:
*/
u8* GetPointer(VAddr vaddr);
+ template <typename T>
+ T* GetPointer(VAddr vaddr) {
+ return reinterpret_cast<T*>(GetPointer(vaddr));
+ }
+
/**
* Gets a pointer to the given address.
*
@@ -126,6 +131,11 @@ public:
*/
const u8* GetPointer(VAddr vaddr) const;
+ template <typename T>
+ const T* GetPointer(VAddr vaddr) const {
+ return reinterpret_cast<T*>(GetPointer(vaddr));
+ }
+
/**
* Reads an 8-bit unsigned value from the current process' address space
* at the given virtual address.
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp
index 2dd0eb0f8..8eec567ab 100644
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -10,7 +10,7 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/hardware_properties.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/hid/controllers/npad.h"
#include "core/hle/service/hid/hid.h"
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index f199c3362..74fb32814 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -17,7 +17,7 @@
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/hle/kernel/hle_ipc.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/process.h"
#include "core/hle/result.h"
#include "core/memory.h"
diff --git a/src/core/settings.h b/src/core/settings.h
index a324530bd..d849dded3 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -181,12 +181,13 @@ struct Values {
std::string motion_device;
std::string udp_input_servers;
- bool emulate_analog_keyboard;
-
+ bool mouse_panning;
+ float mouse_panning_sensitivity;
bool mouse_enabled;
std::string mouse_device;
MouseButtonsRaw mouse_buttons;
+ bool emulate_analog_keyboard;
bool keyboard_enabled;
KeyboardKeysRaw keyboard_keys;
KeyboardModsRaw keyboard_mods;
diff --git a/src/input_common/mouse/mouse_input.cpp b/src/input_common/mouse/mouse_input.cpp
index 10786a541..b864d26f2 100644
--- a/src/input_common/mouse/mouse_input.cpp
+++ b/src/input_common/mouse/mouse_input.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
+#include "core/settings.h"
#include "input_common/mouse/mouse_input.h"
namespace MouseInput {
@@ -32,10 +33,18 @@ void Mouse::UpdateThread() {
info.motion.UpdateOrientation(update_time * 1000);
info.tilt_speed = 0;
info.data.motion = info.motion.GetMotion();
+ if (Settings::values.mouse_panning) {
+ info.last_mouse_change *= 0.96f;
+ info.data.axis = {static_cast<int>(16 * info.last_mouse_change.x),
+ static_cast<int>(16 * -info.last_mouse_change.y)};
+ }
}
if (configuring) {
UpdateYuzuSettings();
}
+ if (mouse_panning_timout++ > 20) {
+ StopPanning();
+ }
std::this_thread::sleep_for(std::chrono::milliseconds(update_time));
}
}
@@ -65,8 +74,45 @@ void Mouse::PressButton(int x, int y, int button_) {
mouse_info[button_index].data.pressed = true;
}
-void Mouse::MouseMove(int x, int y) {
+void Mouse::StopPanning() {
for (MouseInfo& info : mouse_info) {
+ if (Settings::values.mouse_panning) {
+ info.data.axis = {};
+ info.tilt_speed = 0;
+ info.last_mouse_change = {};
+ }
+ }
+}
+
+void Mouse::MouseMove(int x, int y, int center_x, int center_y) {
+ for (MouseInfo& info : mouse_info) {
+ if (Settings::values.mouse_panning) {
+ auto mouse_change =
+ (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
+ mouse_panning_timout = 0;
+
+ if (mouse_change.y == 0 && mouse_change.x == 0) {
+ continue;
+ }
+ const auto mouse_change_length = mouse_change.Length();
+ if (mouse_change_length < 3.0f) {
+ mouse_change /= mouse_change_length / 3.0f;
+ }
+
+ info.last_mouse_change = (info.last_mouse_change * 0.91f) + (mouse_change * 0.09f);
+
+ const auto last_mouse_change_length = info.last_mouse_change.Length();
+ if (last_mouse_change_length > 8.0f) {
+ info.last_mouse_change /= last_mouse_change_length / 8.0f;
+ } else if (last_mouse_change_length < 1.0f) {
+ info.last_mouse_change = mouse_change / mouse_change.Length();
+ }
+
+ info.tilt_direction = info.last_mouse_change;
+ info.tilt_speed = info.tilt_direction.Normalize() * info.sensitivity;
+ continue;
+ }
+
if (info.data.pressed) {
const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin;
const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position;
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h
index 58803c1bf..46aa676c1 100644
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -57,8 +57,10 @@ public:
* Signals that mouse has moved.
* @param x the x-coordinate of the cursor
* @param y the y-coordinate of the cursor
+ * @param center_x the x-coordinate of the middle of the screen
+ * @param center_y the y-coordinate of the middle of the screen
*/
- void MouseMove(int x, int y);
+ void MouseMove(int x, int y, int center_x, int center_y);
/**
* Signals that a motion sensor tilt has ended.
@@ -74,11 +76,13 @@ public:
private:
void UpdateThread();
void UpdateYuzuSettings();
+ void StopPanning();
struct MouseInfo {
InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f};
Common::Vec2<int> mouse_origin;
Common::Vec2<int> last_mouse_position;
+ Common::Vec2<float> last_mouse_change;
bool is_tilting = false;
float sensitivity{0.120f};
@@ -94,5 +98,6 @@ private:
Common::SPSCQueue<MouseStatus> mouse_queue;
bool configuring{false};
bool update_thread_running{true};
+ int mouse_panning_timout{};
};
} // namespace MouseInput
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index 3d799b293..bb56787ee 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -6,6 +6,7 @@
#include <utility>
#include "common/threadsafe_queue.h"
+#include "core/settings.h"
#include "input_common/mouse/mouse_input.h"
#include "input_common/mouse/mouse_poller.h"
@@ -71,7 +72,7 @@ public:
std::lock_guard lock{mutex};
const auto axis_value =
static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis));
- return axis_value / (100.0f * range);
+ return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range);
}
std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
diff --git a/src/input_common/settings.h b/src/input_common/settings.h
index 75486554b..a59f5d461 100644
--- a/src/input_common/settings.h
+++ b/src/input_common/settings.h
@@ -340,6 +340,7 @@ enum class ControllerType {
LeftJoycon,
RightJoycon,
Handheld,
+ GameCube,
};
struct PlayerInput {
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index e7e50d789..c4afa4174 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -144,6 +144,10 @@ Client::~Client() {
Reset();
}
+Client::ClientData::ClientData() = default;
+
+Client::ClientData::~ClientData() = default;
+
std::vector<Common::ParamPackage> Client::GetInputDevices() const {
std::vector<Common::ParamPackage> devices;
for (std::size_t client = 0; client < clients.size(); client++) {
diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h
index 822f9c550..a523f6124 100644
--- a/src/input_common/udp/client.h
+++ b/src/input_common/udp/client.h
@@ -98,6 +98,9 @@ public:
private:
struct ClientData {
+ ClientData();
+ ~ClientData();
+
std::string host{"127.0.0.1"};
u16 port{26760};
std::size_t pad_index{};
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp
index b630281a0..9829da6f0 100644
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -84,8 +84,8 @@ public:
private:
const std::string ip;
- const u16 port;
- const u16 pad;
+ [[maybe_unused]] const u16 port;
+ [[maybe_unused]] const u16 pad;
CemuhookUDP::Client* client;
mutable std::mutex mutex;
};
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 6a5c18945..4ea0076e9 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,5 +1,6 @@
add_executable(tests
common/bit_field.cpp
+ common/cityhash.cpp
common/fibers.cpp
common/param_package.cpp
common/ring_buffer.cpp
diff --git a/src/tests/common/cityhash.cpp b/src/tests/common/cityhash.cpp
new file mode 100644
index 000000000..7a40b6c4a
--- /dev/null
+++ b/src/tests/common/cityhash.cpp
@@ -0,0 +1,22 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+
+#include "common/cityhash.h"
+
+constexpr char msg[] = "The blue frogs are singing under the crimson sky.\n"
+ "It is time to run, Robert.";
+
+using namespace Common;
+
+TEST_CASE("CityHash", "[common]") {
+ // These test results were built against a known good version.
+ REQUIRE(CityHash64(msg, sizeof(msg)) == 0x92d5c2e9cbfbbc01);
+ REQUIRE(CityHash64WithSeed(msg, sizeof(msg), 0xdead) == 0xbfbe93f21a2820dd);
+ REQUIRE(CityHash64WithSeeds(msg, sizeof(msg), 0xbeef, 0xcafe) == 0xb343317955fc8a06);
+ REQUIRE(CityHash128(msg, sizeof(msg)) == u128{0x98e60d0423747eaa, 0xd8694c5b6fcaede9});
+ REQUIRE(CityHash128WithSeed(msg, sizeof(msg), {0xdead, 0xbeef}) ==
+ u128{0xf0307dba81199ebe, 0xd77764e0c4a9eb74});
+}
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index 651633e9e..edced69bb 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
}
+
+TEST_CASE("BufferBase: Cached write") {
+ RasterizerInterface rasterizer;
+ BufferBase buffer(rasterizer, c, WORD);
+ buffer.UnmarkRegionAsCpuModified(c, WORD);
+ buffer.CachedCpuWrite(c + PAGE, PAGE);
+ REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ buffer.FlushCachedWrites();
+ REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ buffer.MarkRegionAsCpuModified(c, WORD);
+ REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Multiple cached write") {
+ RasterizerInterface rasterizer;
+ BufferBase buffer(rasterizer, c, WORD);
+ buffer.UnmarkRegionAsCpuModified(c, WORD);
+ buffer.CachedCpuWrite(c + PAGE, PAGE);
+ buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
+ REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
+ buffer.FlushCachedWrites();
+ REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
+ buffer.MarkRegionAsCpuModified(c, WORD);
+ REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write unmarked") {
+ RasterizerInterface rasterizer;
+ BufferBase buffer(rasterizer, c, WORD);
+ buffer.UnmarkRegionAsCpuModified(c, WORD);
+ buffer.CachedCpuWrite(c + PAGE, PAGE);
+ buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
+ REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ buffer.FlushCachedWrites();
+ REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ buffer.MarkRegionAsCpuModified(c, WORD);
+ REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write iterated") {
+ RasterizerInterface rasterizer;
+ BufferBase buffer(rasterizer, c, WORD);
+ buffer.UnmarkRegionAsCpuModified(c, WORD);
+ buffer.CachedCpuWrite(c + PAGE, PAGE);
+ int num = 0;
+ buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+ REQUIRE(num == 0);
+ REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ buffer.FlushCachedWrites();
+ REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ buffer.MarkRegionAsCpuModified(c, WORD);
+ REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write downloads") {
+ RasterizerInterface rasterizer;
+ BufferBase buffer(rasterizer, c, WORD);
+ buffer.UnmarkRegionAsCpuModified(c, WORD);
+ REQUIRE(rasterizer.Count() == 64);
+ buffer.CachedCpuWrite(c + PAGE, PAGE);
+ REQUIRE(rasterizer.Count() == 63);
+ buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
+ int num = 0;
+ buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+ buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+ REQUIRE(num == 0);
+ REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
+ buffer.FlushCachedWrites();
+ REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+ REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
+ buffer.MarkRegionAsCpuModified(c, WORD);
+ REQUIRE(rasterizer.Count() == 0);
+}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2cf95937e..9b931976a 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,10 +2,8 @@ add_subdirectory(host_shaders)
add_library(video_core STATIC
buffer_cache/buffer_base.h
- buffer_cache/buffer_block.h
+ buffer_cache/buffer_cache.cpp
buffer_cache/buffer_cache.h
- buffer_cache/map_interval.cpp
- buffer_cache/map_interval.h
cdma_pusher.cpp
cdma_pusher.h
command_classes/codecs/codec.cpp
@@ -152,8 +150,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_staging_buffer_pool.h
renderer_vulkan/vk_state_tracker.cpp
renderer_vulkan/vk_state_tracker.h
- renderer_vulkan/vk_stream_buffer.cpp
- renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_cache.cpp
@@ -271,14 +267,13 @@ create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad xbyak)
-if (MSVC)
- target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
- target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib)
-else()
- target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
- target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES})
+if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
+ add_dependencies(video_core ffmpeg-build)
endif()
+target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
+target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
+
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index ee8602ce9..0c00ae280 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {
enum class BufferFlagBits {
Picked = 1 << 0,
+ CachedWrites = 1 << 1,
};
DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
@@ -40,7 +41,7 @@ class BufferBase {
static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
/// Vector tracking modified pages tightly packed with small vector optimization
- union WrittenWords {
+ union WordsArray {
/// Returns the pointer to the words state
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
u64* heap; ///< Not-small buffers pointer to the storage
};
- struct GpuCpuWords {
- explicit GpuCpuWords() = default;
- explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} {
+ struct Words {
+ explicit Words() = default;
+ explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
if (IsShort()) {
cpu.stack = ~u64{0};
gpu.stack = 0;
+ cached_cpu.stack = 0;
+ untracked.stack = ~u64{0};
} else {
// Share allocation between CPU and GPU pages and set their default values
const size_t num_words = NumWords();
- u64* const alloc = new u64[num_words * 2];
+ u64* const alloc = new u64[num_words * 4];
cpu.heap = alloc;
gpu.heap = alloc + num_words;
+ cached_cpu.heap = alloc + num_words * 2;
+ untracked.heap = alloc + num_words * 3;
std::fill_n(cpu.heap, num_words, ~u64{0});
std::fill_n(gpu.heap, num_words, 0);
+ std::fill_n(cached_cpu.heap, num_words, 0);
+ std::fill_n(untracked.heap, num_words, ~u64{0});
}
// Clean up tailing bits
- const u64 last_local_page =
- Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE);
+ const u64 last_word_size = size_bytes % BYTES_PER_WORD;
+ const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
- u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1];
- last_word = (last_word << shift) >> shift;
+ const u64 last_word = (~u64{0} << shift) >> shift;
+ cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
+ untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
}
- ~GpuCpuWords() {
+ ~Words() {
Release();
}
- GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept {
+ Words& operator=(Words&& rhs) noexcept {
Release();
size_bytes = rhs.size_bytes;
cpu = rhs.cpu;
gpu = rhs.gpu;
+ cached_cpu = rhs.cached_cpu;
+ untracked = rhs.untracked;
rhs.cpu.heap = nullptr;
return *this;
}
- GpuCpuWords(GpuCpuWords&& rhs) noexcept
- : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} {
+ Words(Words&& rhs) noexcept
+ : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
+ cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
rhs.cpu.heap = nullptr;
}
- GpuCpuWords& operator=(const GpuCpuWords&) = delete;
- GpuCpuWords(const GpuCpuWords&) = delete;
+ Words& operator=(const Words&) = delete;
+ Words(const Words&) = delete;
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
}
u64 size_bytes = 0;
- WrittenWords cpu;
- WrittenWords gpu;
+ WordsArray cpu;
+ WordsArray gpu;
+ WordsArray cached_cpu;
+ WordsArray untracked;
+ };
+
+ enum class Type {
+ CPU,
+ GPU,
+ CachedCPU,
+ Untracked,
};
public:
@@ -132,68 +152,93 @@ public:
BufferBase& operator=(const BufferBase&) = delete;
BufferBase(const BufferBase&) = delete;
+ BufferBase& operator=(BufferBase&&) = default;
+ BufferBase(BufferBase&&) = default;
+
/// Returns the inclusive CPU modified range in a begin end pair
[[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
- return ModifiedRegion<false>(offset, query_size);
+ return ModifiedRegion<Type::CPU>(offset, query_size);
}
/// Returns the inclusive GPU modified range in a begin end pair
[[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
- return ModifiedRegion<true>(offset, query_size);
+ return ModifiedRegion<Type::GPU>(offset, query_size);
}
/// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
- return IsRegionModified<false>(offset, query_size);
+ return IsRegionModified<Type::CPU>(offset, query_size);
}
/// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
const u64 offset = query_cpu_addr - cpu_addr;
- return IsRegionModified<true>(offset, query_size);
+ return IsRegionModified<Type::GPU>(offset, query_size);
}
/// Mark region as CPU modified, notifying the rasterizer about this change
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
- ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size);
+ ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
}
/// Unmark region as CPU modified, notifying the rasterizer about this change
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
- ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size);
+ ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
}
/// Mark region as modified from the host GPU
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
- ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size);
+ ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
}
/// Unmark region as modified from the host GPU
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
- ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size);
+ ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
+ }
+
+ /// Mark region as modified from the CPU
+ /// but don't mark it as modified until FlusHCachedWrites is called.
+ void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
+ flags |= BufferFlagBits::CachedWrites;
+ ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
+ }
+
+ /// Flushes cached CPU writes, and notify the rasterizer about the deltas
+ void FlushCachedWrites() noexcept {
+ flags &= ~BufferFlagBits::CachedWrites;
+ const u64 num_words = NumWords();
+ const u64* const cached_words = Array<Type::CachedCPU>();
+ u64* const untracked_words = Array<Type::Untracked>();
+ u64* const cpu_words = Array<Type::CPU>();
+ for (u64 word_index = 0; word_index < num_words; ++word_index) {
+ const u64 cached_bits = cached_words[word_index];
+ NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
+ untracked_words[word_index] |= cached_bits;
+ cpu_words[word_index] |= cached_bits;
+ }
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
- ForEachModifiedRange<false, true>(query_cpu_range, size, func);
+ ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
- ForEachModifiedRange<true, false>(query_cpu_range, size, func);
+ ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
void ForEachDownloadRange(Func&& func) {
- ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func);
+ ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
}
/// Mark buffer as picked
@@ -206,6 +251,16 @@ public:
flags &= ~BufferFlagBits::Picked;
}
+ /// Increases the likeliness of this being a stream buffer
+ void IncreaseStreamScore(int score) noexcept {
+ stream_score += score;
+ }
+
+ /// Returns the likeliness of this being a stream buffer
+ [[nodiscard]] int StreamScore() const noexcept {
+ return stream_score;
+ }
+
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
@@ -216,6 +271,11 @@ public:
return True(flags & BufferFlagBits::Picked);
}
+ /// Returns true when the buffer has pending cached writes
+ [[nodiscard]] bool HasCachedWrites() const noexcept {
+ return True(flags & BufferFlagBits::CachedWrites);
+ }
+
/// Returns the base CPU address of the buffer
[[nodiscard]] VAddr CpuAddr() const noexcept {
return cpu_addr;
@@ -233,26 +293,48 @@ public:
}
private:
+ template <Type type>
+ u64* Array() noexcept {
+ if constexpr (type == Type::CPU) {
+ return words.cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::GPU) {
+ return words.gpu.Pointer(IsShort());
+ } else if constexpr (type == Type::CachedCPU) {
+ return words.cached_cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::Untracked) {
+ return words.untracked.Pointer(IsShort());
+ }
+ }
+
+ template <Type type>
+ const u64* Array() const noexcept {
+ if constexpr (type == Type::CPU) {
+ return words.cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::GPU) {
+ return words.gpu.Pointer(IsShort());
+ } else if constexpr (type == Type::CachedCPU) {
+ return words.cached_cpu.Pointer(IsShort());
+ } else if constexpr (type == Type::Untracked) {
+ return words.untracked.Pointer(IsShort());
+ }
+ }
+
/**
* Change the state of a range of pages
*
- * @param written_words Pages to be marked or unmarked as modified
* @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified
- *
- * @tparam enable True when the bits will be set to one, false for zero
- * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
*/
- template <bool enable, bool notify_rasterizer>
- void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr,
- s64 size) noexcept(!notify_rasterizer) {
+ template <Type type, bool enable>
+ void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
const s64 difference = dirty_addr - cpu_addr;
const u64 offset = std::max<s64>(difference, 0);
size += std::min<s64>(difference, 0);
if (offset >= SizeBytes() || size < 0) {
return;
}
- u64* const state_words = written_words.Pointer(IsShort());
+ u64* const untracked_words = Array<Type::Untracked>();
+ u64* const state_words = Array<type>();
const u64 offset_end = std::min(offset + size, SizeBytes());
const u64 begin_page_index = offset / BYTES_PER_PAGE;
const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +350,19 @@ private:
u64 bits = ~u64{0};
bits = (bits >> right_offset) << right_offset;
bits = (bits << left_offset) >> left_offset;
- if constexpr (notify_rasterizer) {
- NotifyRasterizer<!enable>(word_index, state_words[word_index], bits);
+ if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+ NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
}
if constexpr (enable) {
state_words[word_index] |= bits;
+ if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+ untracked_words[word_index] |= bits;
+ }
} else {
state_words[word_index] &= ~bits;
+ if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+ untracked_words[word_index] &= ~bits;
+ }
}
page_index = 0;
++word_index;
@@ -291,7 +379,7 @@ private:
* @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
*/
template <bool add_to_rasterizer>
- void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) {
+ void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
while (changed_bits != 0) {
@@ -315,21 +403,20 @@ private:
* @param query_cpu_range Base CPU address to loop over
* @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region
- *
- * @tparam gpu True for host GPU pages, false for CPU pages
- * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
*/
- template <bool gpu, bool notify_rasterizer, typename Func>
+ template <Type type, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+ static_assert(type != Type::Untracked);
+
const s64 difference = query_cpu_range - cpu_addr;
const u64 query_begin = std::max<s64>(difference, 0);
size += std::min<s64>(difference, 0);
if (query_begin >= SizeBytes() || size < 0) {
return;
}
- const u64* const cpu_words = words.cpu.Pointer(IsShort());
+ u64* const untracked_words = Array<Type::Untracked>();
+ u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
- u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
@@ -345,7 +432,8 @@ private:
const u64 word_index_end = std::distance(state_words, last_modified_word);
const unsigned local_page_begin = std::countr_zero(*first_modified_word);
- const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]);
+ const unsigned local_page_end =
+ static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +459,13 @@ private:
const u64 current_word = state_words[word_index] & bits;
state_words[word_index] &= ~bits;
- // Exclude CPU modified pages when visiting GPU pages
- const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0);
- if constexpr (notify_rasterizer) {
- NotifyRasterizer<true>(word_index, word, ~u64{0});
+ if constexpr (type == Type::CPU) {
+ const u64 current_bits = untracked_words[word_index] & bits;
+ untracked_words[word_index] &= ~bits;
+ NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
+ // Exclude CPU modified pages when visiting GPU pages
+ const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
u64 page = page_begin;
page_begin = 0;
@@ -416,17 +506,20 @@ private:
* @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications
*/
- template <bool gpu>
+ template <Type type>
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
- const u64* const cpu_words = words.cpu.Pointer(IsShort());
- const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+ static_assert(type != Type::Untracked);
+
+ const u64* const untracked_words = Array<Type::Untracked>();
+ const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
const u64 word_end = std::min(word_begin + num_query_words, NumWords());
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
- const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+ const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+ const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
@@ -445,13 +538,13 @@ private:
*
* @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications
- *
- * @tparam gpu True to query GPU modified pages, false for CPU pages
*/
- template <bool gpu>
+ template <Type type>
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
- const u64* const cpu_words = words.cpu.Pointer(IsShort());
- const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+ static_assert(type != Type::Untracked);
+
+ const u64* const untracked_words = Array<Type::Untracked>();
+ const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +553,8 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
- const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+ const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+ const u64 word = state_words[word_index] & ~off_word;
if (word == 0) {
continue;
}
@@ -488,8 +582,9 @@ private:
RasterizerInterface* rasterizer = nullptr;
VAddr cpu_addr = 0;
- GpuCpuWords words;
+ Words words;
BufferFlagBits flags{};
+ int stream_score = 0;
};
} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
deleted file mode 100644
index e9306194a..000000000
--- a/src/video_core/buffer_cache/buffer_block.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCommon {
-
-class BufferBlock {
-public:
- [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
- return (cpu_addr < end) && (cpu_addr_end > start);
- }
-
- [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
- return cpu_addr <= other_start && other_end <= cpu_addr_end;
- }
-
- [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
- return static_cast<std::size_t>(in_addr - cpu_addr);
- }
-
- [[nodiscard]] VAddr CpuAddr() const {
- return cpu_addr;
- }
-
- [[nodiscard]] VAddr CpuAddrEnd() const {
- return cpu_addr_end;
- }
-
- void SetCpuAddr(VAddr new_addr) {
- cpu_addr = new_addr;
- cpu_addr_end = new_addr + size;
- }
-
- [[nodiscard]] std::size_t Size() const {
- return size;
- }
-
- [[nodiscard]] u64 Epoch() const {
- return epoch;
- }
-
- void SetEpoch(u64 new_epoch) {
- epoch = new_epoch;
- }
-
-protected:
- explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
- SetCpuAddr(cpu_addr_);
- }
-
-private:
- VAddr cpu_addr{};
- VAddr cpu_addr_end{};
- std::size_t size{};
- u64 epoch{};
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
new file mode 100644
index 000000000..ab32294c8
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/microprofile.h"
+
+namespace VideoCommon {
+
+MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83b9ee871..2a6844ab1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,591 +4,1289 @@
#pragma once
-#include <list>
+#include <algorithm>
+#include <array>
+#include <deque>
#include <memory>
#include <mutex>
+#include <span>
#include <unordered_map>
-#include <unordered_set>
-#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_set.hpp>
-#include <boost/intrusive/set.hpp>
-#include "common/alignment.h"
-#include "common/assert.h"
#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "core/core.h"
+#include "common/div_ceil.h"
+#include "common/microprofile.h"
+#include "common/scope_exit.h"
#include "core/memory.h"
#include "core/settings.h"
-#include "video_core/buffer_cache/buffer_block.h"
-#include "video_core/buffer_cache/map_interval.h"
+#include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/delayed_destruction_ring.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
namespace VideoCommon {
-template <typename Buffer, typename BufferType, typename StreamBuffer>
+MICROPROFILE_DECLARE(GPU_PrepareBuffers);
+MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
+MICROPROFILE_DECLARE(GPU_DownloadMemory);
+
+using BufferId = SlotId;
+
+constexpr u32 NUM_VERTEX_BUFFERS = 32;
+constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
+constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
+constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
+constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_STAGES = 5;
+
+template <typename P>
class BufferCache {
- using IntervalSet = boost::icl::interval_set<VAddr>;
- using IntervalType = typename IntervalSet::interval_type;
- using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+ // Page size for caching purposes.
+ // This is unrelated to the CPU page size and it can be changed as it seems optimal.
+ static constexpr u32 PAGE_BITS = 16;
+ static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
+
+ static constexpr bool IS_OPENGL = P::IS_OPENGL;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
+ P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
+ P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
+ static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+
+ static constexpr BufferId NULL_BUFFER_ID{0};
+
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+ using Runtime = typename P::Runtime;
+ using Buffer = typename P::Buffer;
+
+ struct Empty {};
+
+ struct OverlapResult {
+ std::vector<BufferId> ids;
+ VAddr begin;
+ VAddr end;
+ bool has_stream_leap = false;
+ };
- static constexpr u64 WRITE_PAGE_BIT = 11;
- static constexpr u64 BLOCK_PAGE_BITS = 21;
- static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+ struct Binding {
+ VAddr cpu_addr{};
+ u32 size{};
+ BufferId buffer_id;
+ };
-public:
- struct BufferInfo {
- BufferType handle;
- u64 offset;
- u64 address;
+ static constexpr Binding NULL_BINDING{
+ .cpu_addr = 0,
+ .size = 0,
+ .buffer_id = NULL_BUFFER_ID,
};
- BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool is_written = false, bool use_fast_cbuf = false) {
- std::lock_guard lock{mutex};
+public:
+ static constexpr u32 SKIP_CACHE_SIZE = 4096;
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr) {
- return GetEmptyBuffer(size);
- }
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ Runtime& runtime_);
- // Cache management is a big overhead, so only cache entries with a given size.
- // TODO: Figure out which size is the best for given games.
- constexpr std::size_t max_stream_size = 0x800;
- if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
- const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
- if (use_fast_cbuf) {
- u8* dest;
- if (is_granular) {
- dest = gpu_memory.GetPointer(gpu_addr);
- } else {
- staging_buffer.resize(size);
- dest = staging_buffer.data();
- gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
- }
- return ConstBufferUpload(dest, size);
- }
- if (is_granular) {
- u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
- std::memcpy(dest, host_ptr, size);
- });
- } else {
- return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
- gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
- });
- }
- }
- }
+ void TickFrame();
- Buffer* const block = GetBlock(*cpu_addr, size);
- MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
- if (!map) {
- return GetEmptyBuffer(size);
- }
- if (is_written) {
- map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() &&
- Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- MarkForAsyncFlush(map);
- }
- if (!map->is_written) {
- map->is_written = true;
- MarkRegionAsWritten(map->start, map->end - 1);
- }
- }
+ void WriteMemory(VAddr cpu_addr, u64 size);
- return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
- }
+ void CachedWriteMemory(VAddr cpu_addr, u64 size);
- /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
- BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
- std::size_t alignment = 4) {
- std::lock_guard lock{mutex};
- return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
- std::memcpy(dest, raw_pointer, size);
- });
- }
+ void DownloadMemory(VAddr cpu_addr, u64 size);
- /// Prepares the buffer cache for data uploading
- /// @param max_size Maximum number of bytes that will be uploaded
- /// @return True when a stream buffer invalidation was required, false otherwise
- void Map(std::size_t max_size) {
- std::lock_guard lock{mutex};
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
- std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
- buffer_offset = buffer_offset_base;
- }
+ void UpdateGraphicsBuffers(bool is_indexed);
- /// Finishes the upload stream
- void Unmap() {
- std::lock_guard lock{mutex};
- stream_buffer.Unmap(buffer_offset - buffer_offset_base);
- }
+ void UpdateComputeBuffers();
- /// Function called at the end of each frame, inteded for deferred operations
- void TickFrame() {
- ++epoch;
+ void BindHostGeometryBuffers(bool is_indexed);
- while (!pending_destruction.empty()) {
- // Delay at least 4 frames before destruction.
- // This is due to triple buffering happening on some drivers.
- static constexpr u64 epochs_to_destroy = 5;
- if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
- break;
- }
- pending_destruction.pop();
- }
- }
+ void BindHostStageBuffers(size_t stage);
- /// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void BindHostComputeBuffers();
- VectorMapInterval objects = GetMapsInRange(addr, size);
- std::sort(objects.begin(), objects.end(),
- [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
- for (MapInterval* object : objects) {
- if (object->is_modified && object->is_registered) {
- mutex.unlock();
- FlushMap(object);
- mutex.lock();
- }
- }
- }
+ void SetEnabledUniformBuffers(size_t stage, u32 enabled);
- bool MustFlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void SetEnabledComputeUniformBuffers(u32 enabled);
- const VectorMapInterval objects = GetMapsInRange(addr, size);
- return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
- return map->is_modified && map->is_registered;
- });
- }
+ void UnbindGraphicsStorageBuffers(size_t stage);
- /// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
- std::lock_guard lock{mutex};
+ void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written);
- for (auto& object : GetMapsInRange(addr, size)) {
- if (object->is_registered) {
- Unregister(object);
- }
- }
- }
+ void UnbindComputeStorageBuffers();
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
+ void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written);
- for (MapInterval* object : GetMapsInRange(addr, size)) {
- if (object->is_memory_marked && object->is_registered) {
- UnmarkMemory(object);
- object->is_sync_pending = true;
- marked_for_unregister.emplace_back(object);
- }
- }
- }
+ void FlushCachedWrites();
- void SyncGuestHost() {
- std::lock_guard lock{mutex};
+ /// Return true when there are uncommitted buffers to be downloaded
+ [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
- for (auto& object : marked_for_unregister) {
- if (object->is_registered) {
- object->is_sync_pending = false;
- Unregister(object);
- }
+ /// Return true when the caller should wait for async downloads
+ [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
+
+ /// Commit asynchronous downloads
+ void CommitAsyncFlushes();
+
+ /// Pop asynchronous downloads
+ void PopAsyncFlushes();
+
+ /// Return true when a CPU region is modified from the GPU
+ [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+
+ std::mutex mutex;
+
+private:
+ template <typename Func>
+ static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
+ for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
+ const int disabled_bits = std::countr_zero(enabled_mask);
+ index += disabled_bits;
+ enabled_mask >>= disabled_bits;
+ func(index);
}
- marked_for_unregister.clear();
}
- void CommitAsyncFlushes() {
- if (uncommitted_flushes) {
- auto commit_list = std::make_shared<std::list<MapInterval*>>();
- for (MapInterval* map : *uncommitted_flushes) {
- if (map->is_registered && map->is_modified) {
- // TODO(Blinkhawk): Implement backend asynchronous flushing
- // AsyncFlushMap(map)
- commit_list->push_back(map);
- }
- }
- if (!commit_list->empty()) {
- committed_flushes.push_back(commit_list);
- } else {
- committed_flushes.emplace_back();
+ template <typename Func>
+ void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
+ const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
+ for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ ++page;
+ continue;
}
- } else {
- committed_flushes.emplace_back();
+ Buffer& buffer = slot_buffers[buffer_id];
+ func(buffer_id, buffer);
+
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
}
- uncommitted_flushes.reset();
}
- bool ShouldWaitAsyncFlushes() const {
- return !committed_flushes.empty() && committed_flushes.front() != nullptr;
+ static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
+ return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
+ ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
}
- bool HasUncommittedFlushes() const {
- return uncommitted_flushes != nullptr;
- }
+ void BindHostIndexBuffer();
- void PopAsyncFlushes() {
- if (committed_flushes.empty()) {
- return;
- }
- auto& flush_list = committed_flushes.front();
- if (!flush_list) {
- committed_flushes.pop_front();
- return;
- }
- for (MapInterval* map : *flush_list) {
- if (map->is_registered) {
- // TODO(Blinkhawk): Replace this for reading the asynchronous flush
- FlushMap(map);
- }
- }
- committed_flushes.pop_front();
- }
+ void BindHostVertexBuffers();
- virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
+ void BindHostGraphicsUniformBuffers(size_t stage);
-protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- StreamBuffer& stream_buffer_)
- : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
- stream_buffer{stream_buffer_} {}
+ void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
- ~BufferCache() = default;
+ void BindHostGraphicsStorageBuffers(size_t stage);
- virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+ void BindHostTransformFeedbackBuffers();
- virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
- return {};
- }
+ void BindHostComputeUniformBuffers();
- /// Register an object into the cache
- MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
- const VAddr cpu_addr = new_map.start;
- if (!cpu_addr) {
- LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
- new_map.gpu_addr);
- return nullptr;
- }
- const std::size_t size = new_map.end - new_map.start;
- new_map.is_registered = true;
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
- new_map.is_memory_marked = true;
- if (inherit_written) {
- MarkRegionAsWritten(new_map.start, new_map.end - 1);
- new_map.is_written = true;
- }
- MapInterval* const storage = mapped_addresses_allocator.Allocate();
- *storage = new_map;
- mapped_addresses.insert(*storage);
- return storage;
- }
+ void BindHostComputeStorageBuffers();
- void UnmarkMemory(MapInterval* map) {
- if (!map->is_memory_marked) {
- return;
- }
- const std::size_t size = map->end - map->start;
- rasterizer.UpdatePagesCachedCount(map->start, size, -1);
- map->is_memory_marked = false;
- }
-
- /// Unregisters an object from the cache
- void Unregister(MapInterval* map) {
- UnmarkMemory(map);
- map->is_registered = false;
- if (map->is_sync_pending) {
- map->is_sync_pending = false;
- marked_for_unregister.remove(map);
+ void DoUpdateGraphicsBuffers(bool is_indexed);
+
+ void DoUpdateComputeBuffers();
+
+ void UpdateIndexBuffer();
+
+ void UpdateVertexBuffers();
+
+ void UpdateVertexBuffer(u32 index);
+
+ void UpdateUniformBuffers(size_t stage);
+
+ void UpdateStorageBuffers(size_t stage);
+
+ void UpdateTransformFeedbackBuffers();
+
+ void UpdateTransformFeedbackBuffer(u32 index);
+
+ void UpdateComputeUniformBuffers();
+
+ void UpdateComputeStorageBuffers();
+
+ void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
+
+ [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
+
+ [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
+
+ void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
+
+ [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
+
+ void Register(BufferId buffer_id);
+
+ void Unregister(BufferId buffer_id);
+
+ template <bool insert>
+ void ChangeRegister(BufferId buffer_id);
+
+ void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+ void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
+
+ void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+ std::span<BufferCopy> copies);
+
+ void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+ std::span<const BufferCopy> copies);
+
+ void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
+
+ void DeleteBuffer(BufferId buffer_id);
+
+ void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
+
+ void NotifyBufferDeletion();
+
+ [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+
+ [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
+
+ [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
+
+ [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
+
+ VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
+ Runtime& runtime;
+
+ SlotVector<Buffer> slot_buffers;
+ DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
+
+ u32 last_index_count = 0;
+
+ Binding index_buffer;
+ std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
+ std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
+ std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+ std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
+
+ std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
+ std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+
+ std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
+ u32 enabled_compute_uniform_buffers = 0;
+
+ std::array<u32, NUM_STAGES> enabled_storage_buffers{};
+ std::array<u32, NUM_STAGES> written_storage_buffers{};
+ u32 enabled_compute_storage_buffers = 0;
+ u32 written_compute_storage_buffers = 0;
+
+ std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+
+ bool has_deleted_buffers = false;
+
+ std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
+ dirty_uniform_buffers{};
+
+ std::vector<BufferId> cached_write_buffer_ids;
+
+ // TODO: This data structure is not optimal and it should be reworked
+ std::vector<BufferId> uncommitted_downloads;
+ std::deque<std::vector<BufferId>> committed_downloads;
+
+ size_t immediate_buffer_capacity = 0;
+ std::unique_ptr<u8[]> immediate_buffer_alloc;
+
+ std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
+};
+
+template <class P>
+BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ Runtime& runtime_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+ gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+ // Ensure the first slot is used for the null buffer
+ void(slot_buffers.insert(runtime, NullBufferParams{}));
+}
+
+template <class P>
+void BufferCache<P>::TickFrame() {
+ delayed_destruction_ring.Tick();
+}
+
+template <class P>
+void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ buffer.MarkRegionAsCpuModified(cpu_addr, size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
+ if (!buffer.HasCachedWrites()) {
+ cached_write_buffer_ids.push_back(buffer_id);
}
- if (map->is_written) {
- UnmarkRegionAsWritten(map->start, map->end - 1);
+ buffer.CachedCpuWrite(cpu_addr, size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
}
- const auto it = mapped_addresses.find(*map);
- ASSERT(it != mapped_addresses.end());
- mapped_addresses.erase(it);
- mapped_addresses_allocator.Release(map);
- }
-
-private:
- MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
- const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
- if (overlaps.empty()) {
- const VAddr cpu_addr_end = cpu_addr + size;
- if (gpu_memory.IsGranularRange(gpu_addr, size)) {
- u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- block->Upload(block->Offset(cpu_addr), size, host_ptr);
- } else {
- staging_buffer.resize(size);
- gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ const u8* const mapped_memory = download_staging.mapped_span.data();
+ const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
+ for (BufferCopy& copy : copies) {
+ // Modify copies to have the staging offset in mind
+ copy.dst_offset += download_staging.offset;
}
- return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
- }
-
- const VAddr cpu_addr_end = cpu_addr + size;
- if (overlaps.size() == 1) {
- MapInterval* const current_map = overlaps[0];
- if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
- return current_map;
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
+ runtime.Finish();
+ for (const BufferCopy& copy : copies) {
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* copy_mapped_memory = mapped_memory + dst_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const BufferCopy& copy : copies) {
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
}
}
- VAddr new_start = cpu_addr;
- VAddr new_end = cpu_addr_end;
- bool write_inheritance = false;
- bool modified_inheritance = false;
- // Calculate new buffer parameters
- for (MapInterval* overlap : overlaps) {
- new_start = std::min(overlap->start, new_start);
- new_end = std::max(overlap->end, new_end);
- write_inheritance |= overlap->is_written;
- modified_inheritance |= overlap->is_modified;
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ uniform_buffers[stage][index] = NULL_BINDING;
+ return;
+ }
+ const Binding binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = BufferId{},
+ };
+ uniform_buffers[stage][index] = binding;
+}
+
+template <class P>
+void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
+ MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+ do {
+ has_deleted_buffers = false;
+ DoUpdateGraphicsBuffers(is_indexed);
+ } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeBuffers() {
+ MICROPROFILE_SCOPE(GPU_PrepareBuffers);
+ do {
+ has_deleted_buffers = false;
+ DoUpdateComputeBuffers();
+ } while (has_deleted_buffers);
+}
+
+template <class P>
+void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ if (is_indexed) {
+ BindHostIndexBuffer();
+ } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
- GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
- for (auto& overlap : overlaps) {
- Unregister(overlap);
+ }
+ BindHostVertexBuffers();
+ BindHostTransformFeedbackBuffers();
+}
+
+template <class P>
+void BufferCache<P>::BindHostStageBuffers(size_t stage) {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ BindHostGraphicsUniformBuffers(stage);
+ BindHostGraphicsStorageBuffers(stage);
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeBuffers() {
+ MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
+ BindHostComputeUniformBuffers();
+ BindHostComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ if (enabled_uniform_buffers[stage] != enabled) {
+ dirty_uniform_buffers[stage] = ~u32{0};
}
- UpdateBlock(block, new_start, new_end, overlaps);
-
- const MapInterval new_map{new_start, new_end, new_gpu_addr};
- MapInterval* const map = Register(new_map, write_inheritance);
- if (!map) {
- return nullptr;
+ }
+ enabled_uniform_buffers[stage] = enabled;
+}
+
+template <class P>
+void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
+ enabled_compute_uniform_buffers = enabled;
+}
+
+template <class P>
+void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
+ enabled_storage_buffers[stage] = 0;
+ written_storage_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
+ u32 cbuf_offset, bool is_written) {
+ enabled_storage_buffers[stage] |= 1U << ssbo_index;
+ written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
+
+ const auto& cbufs = maxwell3d.state.shader_stages[stage];
+ const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
+ storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::UnbindComputeStorageBuffers() {
+ enabled_compute_storage_buffers = 0;
+ written_compute_storage_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
+ bool is_written) {
+ enabled_compute_storage_buffers |= 1U << ssbo_index;
+ written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
+
+ const auto& launch_desc = kepler_compute.launch_description;
+ ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
+
+ const auto& cbufs = launch_desc.const_buffer_config;
+ const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
+ compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
+}
+
+template <class P>
+void BufferCache<P>::FlushCachedWrites() {
+ for (const BufferId buffer_id : cached_write_buffer_ids) {
+ slot_buffers[buffer_id].FlushCachedWrites();
+ }
+ cached_write_buffer_ids.clear();
+}
+
+template <class P>
+bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
+ return !uncommitted_downloads.empty();
+}
+
+template <class P>
+bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return !committed_downloads.empty() && !committed_downloads.front().empty();
+}
+
+template <class P>
+void BufferCache<P>::CommitAsyncFlushes() {
+ // This is intentionally passing the value by copy
+ committed_downloads.push_front(uncommitted_downloads);
+ uncommitted_downloads.clear();
+}
+
+template <class P>
+void BufferCache<P>::PopAsyncFlushes() {
+ if (committed_downloads.empty()) {
+ return;
+ }
+ auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
+ const std::span<const BufferId> download_ids = committed_downloads.back();
+ if (download_ids.empty()) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_DownloadMemory);
+
+ boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ for (const BufferId buffer_id : download_ids) {
+ slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
+ downloads.push_back({
+ BufferCopy{
+ .src_offset = range_offset,
+ .dst_offset = total_size_bytes,
+ .size = range_size,
+ },
+ buffer_id,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ }
+ if (downloads.empty()) {
+ return;
+ }
+ if constexpr (USE_MEMORY_MAPS) {
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
+ for (auto& [copy, buffer_id] : downloads) {
+ // Have in mind the staging buffer offset for the copy
+ copy.dst_offset += download_staging.offset;
+ const std::array copies{copy};
+ runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
}
- if (modified_inheritance) {
- map->MarkAsModified(true, GetModifiedTicks());
- if (Settings::IsGPULevelHigh() &&
- Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- MarkForAsyncFlush(map);
- }
+ runtime.Finish();
+ for (const auto [copy, buffer_id] : downloads) {
+ const Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ // Undo the modified offset
+ const u64 dst_offset = copy.dst_offset - download_staging.offset;
+ const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
+ cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
+ }
+ } else {
+ const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
+ for (const auto [copy, buffer_id] : downloads) {
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
+ cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
}
- return map;
}
-
- void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
- const IntervalType base_interval{start, end};
- IntervalSet interval_set{};
- interval_set.add(base_interval);
- for (auto& overlap : overlaps) {
- const IntervalType subtract{overlap->start, overlap->end};
- interval_set.subtract(subtract);
+}
+
+template <class P>
+bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
+ const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId image_id = page_table[page];
+ if (!image_id) {
+ ++page;
+ continue;
}
- for (auto& interval : interval_set) {
- const std::size_t size = interval.upper() - interval.lower();
- if (size == 0) {
- continue;
- }
- staging_buffer.resize(size);
- cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
- block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
+ Buffer& buffer = slot_buffers[image_id];
+ if (buffer.IsRegionGpuModified(addr, size)) {
+ return true;
}
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
}
-
- VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
- VectorMapInterval result;
- if (size == 0) {
- return result;
+ return false;
+}
+
+template <class P>
+void BufferCache<P>::BindHostIndexBuffer() {
+ Buffer& buffer = slot_buffers[index_buffer.buffer_id];
+ const u32 offset = buffer.Offset(index_buffer.cpu_addr);
+ const u32 size = index_buffer.size;
+ SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
+ if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
+ runtime.BindIndexBuffer(buffer, offset, size);
+ } else {
+ runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
+ maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
+ buffer, offset, size);
+ }
+}
+
+template <class P>
+void BufferCache<P>::BindHostVertexBuffers() {
+ auto& flags = maxwell3d.dirty.flags;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ const Binding& binding = vertex_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
+ if (!flags[Dirty::VertexBuffer0 + index]) {
+ continue;
}
+ flags[Dirty::VertexBuffer0 + index] = false;
- const VAddr addr_end = addr + size;
- auto it = mapped_addresses.lower_bound(addr);
- if (it != mapped_addresses.begin()) {
- --it;
+ const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
+ }
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
+ u32 dirty = ~0U;
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty = std::exchange(dirty_uniform_buffers[stage], 0);
+ }
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ const bool needs_bind = ((dirty >> index) & 1) != 0;
+ BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ ++binding_index;
}
- while (it != mapped_addresses.end() && it->start < addr_end) {
- if (it->Overlaps(addr, addr_end)) {
- result.push_back(&*it);
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
+ bool needs_bind) {
+ const Binding& binding = uniform_buffers[stage][index];
+ const VAddr cpu_addr = binding.cpu_addr;
+ const u32 size = binding.size;
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
+ if constexpr (IS_OPENGL) {
+ if (runtime.HasFastBufferSubData()) {
+ // Fast path for Nvidia
+ if (!HasFastUniformBufferBound(stage, binding_index)) {
+ // We only have to bind when the currently bound buffer is not the fast version
+ runtime.BindFastUniformBuffer(stage, binding_index, size);
+ }
+ const auto span = ImmediateBufferWithData(cpu_addr, size);
+ runtime.PushFastUniformBuffer(stage, binding_index, span);
+ return;
}
- ++it;
}
- return result;
- }
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
- /// Returns a ticks counter used for tracking when cached objects were last modified
- u64 GetModifiedTicks() {
- return ++modified_ticks;
+ // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
+ const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return;
}
-
- void FlushMap(MapInterval* map) {
- const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
- ASSERT_OR_EXECUTE(it != blocks.end(), return;);
-
- std::shared_ptr<Buffer> block = it->second;
-
- const std::size_t size = map->end - map->start;
- staging_buffer.resize(size);
- block->Download(block->Offset(map->start), size, staging_buffer.data());
- cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
- map->MarkAsModified(false, 0);
+ // Classic cached path
+ SynchronizeBuffer(buffer, cpu_addr, size);
+ if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
+ // Skip binding if it's not needed and if the bound buffer is not the fast version
+ // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ return;
}
+ fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
- template <typename Callable>
- BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
- AlignBuffer(alignment);
- const std::size_t uploaded_offset = buffer_offset;
- callable(buffer_ptr);
-
- buffer_ptr += size;
- buffer_offset += size;
- return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
+ const u32 offset = buffer.Offset(cpu_addr);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
+ } else {
+ runtime.BindUniformBuffer(buffer, offset, size);
}
+}
+
+template <class P>
+void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+ const Binding& binding = storage_buffers[stage][index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
+ if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+ runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
+ ++binding_index;
+ } else {
+ runtime.BindStorageBuffer(buffer, offset, size, is_written);
+ }
+ });
+}
- void AlignBuffer(std::size_t alignment) {
- // Align the offset, not the mapped pointer
- const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
- buffer_ptr += offset_aligned - buffer_offset;
- buffer_offset = offset_aligned;
+template <class P>
+void BufferCache<P>::BindHostTransformFeedbackBuffers() {
+ if (maxwell3d.regs.tfb_enabled == 0) {
+ return;
}
+ for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+ const Binding& binding = transform_feedback_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
+ }
+}
- std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
- const std::size_t old_size = buffer->Size();
- const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
- const VAddr cpu_addr = buffer->CpuAddr();
- std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
- new_buffer->CopyFrom(*buffer, 0, 0, old_size);
- QueueDestruction(std::move(buffer));
-
- const VAddr cpu_addr_end = cpu_addr + new_size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- blocks.insert_or_assign(page_start, new_buffer);
+template <class P>
+void BufferCache<P>::BindHostComputeUniformBuffers() {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ // Mark all uniform buffers as dirty
+ dirty_uniform_buffers.fill(~u32{0});
+ }
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ const Binding& binding = compute_uniform_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
+ runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
+ ++binding_index;
+ } else {
+ runtime.BindUniformBuffer(buffer, offset, size);
}
+ });
+}
+
+template <class P>
+void BufferCache<P>::BindHostComputeStorageBuffers() {
+ u32 binding_index = 0;
+ ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+ const Binding& binding = compute_storage_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
+ if constexpr (NEEDS_BIND_STORAGE_INDEX) {
+ runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
+ ++binding_index;
+ } else {
+ runtime.BindStorageBuffer(buffer, offset, size, is_written);
+ }
+ });
+}
- return new_buffer;
+template <class P>
+void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
+ if (is_indexed) {
+ UpdateIndexBuffer();
}
+ UpdateVertexBuffers();
+ UpdateTransformFeedbackBuffers();
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ UpdateUniformBuffers(stage);
+ UpdateStorageBuffers(stage);
+ }
+}
+
+template <class P>
+void BufferCache<P>::DoUpdateComputeBuffers() {
+ UpdateComputeUniformBuffers();
+ UpdateComputeStorageBuffers();
+}
+
+template <class P>
+void BufferCache<P>::UpdateIndexBuffer() {
+ // We have to check for the dirty flags and index count
+ // The index count is currently changed without updating the dirty flags
+ const auto& index_array = maxwell3d.regs.index_array;
+ auto& flags = maxwell3d.dirty.flags;
+ if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
+ return;
+ }
+ flags[Dirty::IndexBuffer] = false;
+ last_index_count = index_array.count;
+
+ const GPUVAddr gpu_addr_begin = index_array.StartAddress();
+ const GPUVAddr gpu_addr_end = index_array.EndAddress();
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+ const u32 size = std::min(address_size, draw_size);
+ if (size == 0 || !cpu_addr) {
+ index_buffer = NULL_BINDING;
+ return;
+ }
+ index_buffer = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = FindBuffer(*cpu_addr, size),
+ };
+}
- std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
- std::shared_ptr<Buffer> second) {
- const std::size_t size_1 = first->Size();
- const std::size_t size_2 = second->Size();
- const VAddr first_addr = first->CpuAddr();
- const VAddr second_addr = second->CpuAddr();
- const VAddr new_addr = std::min(first_addr, second_addr);
- const std::size_t new_size = size_1 + size_2;
-
- std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
- new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
- new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
- QueueDestruction(std::move(first));
- QueueDestruction(std::move(second));
+template <class P>
+void BufferCache<P>::UpdateVertexBuffers() {
+ auto& flags = maxwell3d.dirty.flags;
+ if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+ return;
+ }
+ flags[Dirty::VertexBuffers] = false;
- const VAddr cpu_addr_end = new_addr + new_size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- blocks.insert_or_assign(page_start, new_buffer);
- }
- return new_buffer;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ UpdateVertexBuffer(index);
}
+}
- Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
- std::shared_ptr<Buffer> found;
+template <class P>
+void BufferCache<P>::UpdateVertexBuffer(u32 index) {
+ if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+ return;
+ }
+ const auto& array = maxwell3d.regs.vertex_array[index];
+ const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+ const GPUVAddr gpu_addr_begin = array.StartAddress();
+ const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ const u32 size = address_size; // TODO: Analyze stride and number of vertices
+ if (array.enable == 0 || size == 0 || !cpu_addr) {
+ vertex_buffers[index] = NULL_BINDING;
+ return;
+ }
+ vertex_buffers[index] = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = FindBuffer(*cpu_addr, size),
+ };
+}
+
+template <class P>
+void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ Binding& binding = uniform_buffers[stage][index];
+ if (binding.buffer_id) {
+ // Already updated
+ return;
+ }
+ // Mark as dirty
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty_uniform_buffers[stage] |= 1U << index;
+ }
+ // Resolve buffer
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
+ const u32 written_mask = written_storage_buffers[stage];
+ ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
+ // Resolve buffer
+ Binding& binding = storage_buffers[stage][index];
+ const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ binding.buffer_id = buffer_id;
+ // Mark buffer as written if needed
+ if (((written_mask >> index) & 1) != 0) {
+ MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
- const VAddr cpu_addr_end = cpu_addr + size - 1;
- const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
- for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
- auto it = blocks.find(page_start);
- if (it == blocks.end()) {
- if (found) {
- found = EnlargeBlock(found);
- continue;
- }
- const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
- found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
- blocks.insert_or_assign(page_start, found);
- continue;
- }
- if (!found) {
- found = it->second;
- continue;
- }
- if (found != it->second) {
- found = MergeBlocks(std::move(found), it->second);
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffers() {
+ if (maxwell3d.regs.tfb_enabled == 0) {
+ return;
+ }
+ for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
+ UpdateTransformFeedbackBuffer(index);
+ }
+}
+
+template <class P>
+void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
+ const auto& binding = maxwell3d.regs.tfb_bindings[index];
+ const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
+ const u32 size = binding.buffer_size;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
+ transform_feedback_buffers[index] = NULL_BINDING;
+ return;
+ }
+ const BufferId buffer_id = FindBuffer(*cpu_addr, size);
+ transform_feedback_buffers[index] = Binding{
+ .cpu_addr = *cpu_addr,
+ .size = size,
+ .buffer_id = buffer_id,
+ };
+ MarkWrittenBuffer(buffer_id, *cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeUniformBuffers() {
+ ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ Binding& binding = compute_uniform_buffers[index];
+ binding = NULL_BINDING;
+ const auto& launch_desc = kepler_compute.launch_description;
+ if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
+ const auto& cbuf = launch_desc.const_buffer_config[index];
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+ if (cpu_addr) {
+ binding.cpu_addr = *cpu_addr;
+ binding.size = cbuf.size;
}
}
- return found.get();
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeStorageBuffers() {
+ ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
+ // Resolve buffer
+ Binding& binding = compute_storage_buffers[index];
+ const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ binding.buffer_id = buffer_id;
+ // Mark as written if needed
+ if (((written_compute_storage_buffers >> index) & 1) != 0) {
+ MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
+void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkRegionAsGpuModified(cpu_addr, size);
+
+ const bool is_accuracy_high = Settings::IsGPULevelHigh();
+ const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ if (!is_accuracy_high || !is_async) {
+ return;
+ }
+ if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
+ // Already inserted
+ return;
}
+ uncommitted_downloads.push_back(buffer_id);
+}
- void MarkRegionAsWritten(VAddr start, VAddr end) {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) {
- ++it->second;
- }
+template <class P>
+BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
+ if (cpu_addr == 0) {
+ return NULL_BUFFER_ID;
+ }
+ const u64 page = cpu_addr >> PAGE_BITS;
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ return CreateBuffer(cpu_addr, size);
+ }
+ const Buffer& buffer = slot_buffers[buffer_id];
+ if (buffer.IsInBounds(cpu_addr, size)) {
+ return buffer_id;
+ }
+ return CreateBuffer(cpu_addr, size);
+}
+
+template <class P>
+typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
+ u32 wanted_size) {
+ static constexpr int STREAM_LEAP_THRESHOLD = 16;
+ std::vector<BufferId> overlap_ids;
+ VAddr begin = cpu_addr;
+ VAddr end = cpu_addr + wanted_size;
+ int stream_score = 0;
+ bool has_stream_leap = false;
+ for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
+ const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
+ if (!overlap_id) {
+ continue;
+ }
+ Buffer& overlap = slot_buffers[overlap_id];
+ if (overlap.IsPicked()) {
+ continue;
+ }
+ overlap_ids.push_back(overlap_id);
+ overlap.Pick();
+ const VAddr overlap_cpu_addr = overlap.CpuAddr();
+ if (overlap_cpu_addr < begin) {
+ cpu_addr = begin = overlap_cpu_addr;
+ }
+ end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
+
+ stream_score += overlap.StreamScore();
+ if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
+ // When this memory region has been joined a bunch of times, we assume it's being used
+ // as a stream buffer. Increase the size to skip constantly recreating buffers.
+ has_stream_leap = true;
+ end += PAGE_SIZE * 256;
}
}
-
- void UnmarkRegionAsWritten(VAddr start, VAddr end) {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- auto it = written_pages.find(page_start);
- if (it != written_pages.end()) {
- if (it->second > 1) {
- --it->second;
- } else {
- written_pages.erase(it);
- }
- }
+ return OverlapResult{
+ .ids = std::move(overlap_ids),
+ .begin = begin,
+ .end = end,
+ .has_stream_leap = has_stream_leap,
+ };
+}
+
+template <class P>
+void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
+ bool accumulate_stream_score) {
+ Buffer& new_buffer = slot_buffers[new_buffer_id];
+ Buffer& overlap = slot_buffers[overlap_id];
+ if (accumulate_stream_score) {
+ new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
+ }
+ std::vector<BufferCopy> copies;
+ const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
+ overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = begin,
+ .dst_offset = dst_base_offset + begin,
+ .size = range_size,
+ });
+ new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
+ new_buffer.MarkRegionAsGpuModified(begin, range_size);
+ });
+ if (!copies.empty()) {
+ runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
+ }
+ ReplaceBufferDownloads(overlap_id, new_buffer_id);
+ DeleteBuffer(overlap_id);
+}
+
+template <class P>
+BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
+ const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
+ const u32 size = static_cast<u32>(overlap.end - overlap.begin);
+ const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
+ for (const BufferId overlap_id : overlap.ids) {
+ JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
+ }
+ Register(new_buffer_id);
+ return new_buffer_id;
+}
+
+template <class P>
+void BufferCache<P>::Register(BufferId buffer_id) {
+ ChangeRegister<true>(buffer_id);
+}
+
+template <class P>
+void BufferCache<P>::Unregister(BufferId buffer_id) {
+ ChangeRegister<false>(buffer_id);
+}
+
+template <class P>
+template <bool insert>
+void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
+ const Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr cpu_addr_begin = buffer.CpuAddr();
+ const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
+ const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
+ const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
+ for (u64 page = page_begin; page != page_end; ++page) {
+ if constexpr (insert) {
+ page_table[page] = buffer_id;
+ } else {
+ page_table[page] = BufferId{};
}
}
+}
- bool IsRegionWritten(VAddr start, VAddr end) const {
- const u64 page_end = end >> WRITE_PAGE_BIT;
- for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
- if (written_pages.contains(page_start)) {
- return true;
+template <class P>
+void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
+ if (buffer.CpuAddr() == 0) {
+ return;
+ }
+ SynchronizeBufferImpl(buffer, cpu_addr, size);
+}
+
+template <class P>
+void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
+ boost::container::small_vector<BufferCopy, 4> copies;
+ u64 total_size_bytes = 0;
+ u64 largest_copy = 0;
+ buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ copies.push_back(BufferCopy{
+ .src_offset = total_size_bytes,
+ .dst_offset = range_offset,
+ .size = range_size,
+ });
+ total_size_bytes += range_size;
+ largest_copy = std::max(largest_copy, range_size);
+ });
+ if (total_size_bytes == 0) {
+ return;
+ }
+ const std::span<BufferCopy> copies_span(copies.data(), copies.size());
+ UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
+}
+
+template <class P>
+void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
+ std::span<BufferCopy> copies) {
+ if constexpr (USE_MEMORY_MAPS) {
+ MappedUploadMemory(buffer, total_size_bytes, copies);
+ } else {
+ ImmediateUploadMemory(buffer, largest_copy, copies);
+ }
+}
+
+template <class P>
+void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
+ std::span<const BufferCopy> copies) {
+ std::span<u8> immediate_buffer;
+ for (const BufferCopy& copy : copies) {
+ std::span<const u8> upload_span;
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+ if (IsRangeGranular(cpu_addr, copy.size)) {
+ upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
+ } else {
+ if (immediate_buffer.empty()) {
+ immediate_buffer = ImmediateBuffer(largest_copy);
}
+ cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
+ upload_span = immediate_buffer.subspan(0, copy.size);
}
- return false;
+ buffer.ImmediateUpload(copy.dst_offset, upload_span);
}
-
- void QueueDestruction(std::shared_ptr<Buffer> buffer) {
- buffer->SetEpoch(epoch);
- pending_destruction.push(std::move(buffer));
+}
+
+template <class P>
+void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
+ std::span<BufferCopy> copies) {
+ auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
+ const std::span<u8> staging_pointer = upload_staging.mapped_span;
+ for (BufferCopy& copy : copies) {
+ u8* const src_pointer = staging_pointer.data() + copy.src_offset;
+ const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
+ cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
+
+ // Apply the staging offset
+ copy.src_offset += upload_staging.offset;
}
-
- void MarkForAsyncFlush(MapInterval* map) {
- if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
+ runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+}
+
+template <class P>
+void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
+ const auto scalar_replace = [buffer_id](Binding& binding) {
+ if (binding.buffer_id == buffer_id) {
+ binding.buffer_id = BufferId{};
+ }
+ };
+ const auto replace = [scalar_replace](std::span<Binding> bindings) {
+ std::ranges::for_each(bindings, scalar_replace);
+ };
+ scalar_replace(index_buffer);
+ replace(vertex_buffers);
+ std::ranges::for_each(uniform_buffers, replace);
+ std::ranges::for_each(storage_buffers, replace);
+ replace(transform_feedback_buffers);
+ replace(compute_uniform_buffers);
+ replace(compute_storage_buffers);
+ std::erase(cached_write_buffer_ids, buffer_id);
+
+ // Mark the whole buffer as CPU written to stop tracking CPU writes
+ Buffer& buffer = slot_buffers[buffer_id];
+ buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
+
+ Unregister(buffer_id);
+ delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
+
+ NotifyBufferDeletion();
+}
+
+template <class P>
+void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
+ const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
+ std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
+ if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
+ buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
}
- uncommitted_flushes->insert(map);
+ };
+ replace(uncommitted_downloads);
+ std::ranges::for_each(committed_downloads, replace);
+}
+
+template <class P>
+void BufferCache<P>::NotifyBufferDeletion() {
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ dirty_uniform_buffers.fill(~u32{0});
}
+ auto& flags = maxwell3d.dirty.flags;
+ flags[Dirty::IndexBuffer] = true;
+ flags[Dirty::VertexBuffers] = true;
+ for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ flags[Dirty::VertexBuffer0 + index] = true;
+ }
+ has_deleted_buffers = true;
+}
+
+template <class P>
+typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
+ const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
+ const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr || size == 0) {
+ return NULL_BINDING;
+ }
+ // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
+ // It exists due to some games like Astral Chain operate out of bounds.
+ // Binding the whole map range would be technically correct, but games have large maps that make
+ // this approach unaffordable for now.
+ static constexpr u32 arbitrary_extra_bytes = 0xc000;
+ const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
+ const Binding binding{
+ .cpu_addr = *cpu_addr,
+ .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
+ .buffer_id = BufferId{},
+ };
+ return binding;
+}
+
+template <class P>
+std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
+ u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
+ if (IsRangeGranular(cpu_addr, size) ||
+ base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
+ return std::span(base_pointer, size);
+ } else {
+ const std::span<u8> span = ImmediateBuffer(size);
+ cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
+ return span;
+ }
+}
- VideoCore::RasterizerInterface& rasterizer;
- Tegra::MemoryManager& gpu_memory;
- Core::Memory::Memory& cpu_memory;
- StreamBuffer& stream_buffer;
-
- u8* buffer_ptr = nullptr;
- u64 buffer_offset = 0;
- u64 buffer_offset_base = 0;
-
- MapIntervalAllocator mapped_addresses_allocator;
- boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
- mapped_addresses;
-
- std::unordered_map<u64, u32> written_pages;
- std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
-
- std::queue<std::shared_ptr<Buffer>> pending_destruction;
- u64 epoch = 0;
- u64 modified_ticks = 0;
-
- std::vector<u8> staging_buffer;
-
- std::list<MapInterval*> marked_for_unregister;
-
- std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
- std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
-
- std::recursive_mutex mutex;
-};
+template <class P>
+std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
+ if (wanted_capacity > immediate_buffer_capacity) {
+ immediate_buffer_capacity = wanted_capacity;
+ immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
+ }
+ return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
+}
+
+template <class P>
+bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
+ if constexpr (IS_OPENGL) {
+ return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
+ } else {
+ // Only OpenGL has fast uniform buffers
+ return false;
+ }
+}
} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
deleted file mode 100644
index 62587e18a..000000000
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-
-#include "video_core/buffer_cache/map_interval.h"
-
-namespace VideoCommon {
-
-MapIntervalAllocator::MapIntervalAllocator() {
- FillFreeList(first_chunk);
-}
-
-MapIntervalAllocator::~MapIntervalAllocator() = default;
-
-void MapIntervalAllocator::AllocateNewChunk() {
- *new_chunk = std::make_unique<Chunk>();
- FillFreeList(**new_chunk);
- new_chunk = &(*new_chunk)->next;
-}
-
-void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
- const std::size_t old_size = free_list.size();
- free_list.resize(old_size + chunk.data.size());
- std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
- [](MapInterval& interval) { return &interval; });
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
deleted file mode 100644
index ef974b08a..000000000
--- a/src/video_core/buffer_cache/map_interval.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <vector>
-
-#include <boost/intrusive/set_hook.hpp>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-
-namespace VideoCommon {
-
-struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
- MapInterval() = default;
-
- /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
-
- explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
- : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
-
- bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
- return start <= other_start && other_end <= end;
- }
-
- bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
- return start < other_end && other_start < end;
- }
-
- void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
- is_modified = is_modified_;
- ticks = ticks_;
- }
-
- boost::intrusive::set_member_hook<> member_hook_;
- VAddr start = 0;
- VAddr end = 0;
- GPUVAddr gpu_addr = 0;
- u64 ticks = 0;
- bool is_written = false;
- bool is_modified = false;
- bool is_registered = false;
- bool is_memory_marked = false;
- bool is_sync_pending = false;
-};
-
-struct MapIntervalCompare {
- constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
- return lhs.start < rhs.start;
- }
-};
-
-class MapIntervalAllocator {
-public:
- MapIntervalAllocator();
- ~MapIntervalAllocator();
-
- MapInterval* Allocate() {
- if (free_list.empty()) {
- AllocateNewChunk();
- }
- MapInterval* const interval = free_list.back();
- free_list.pop_back();
- return interval;
- }
-
- void Release(MapInterval* interval) {
- free_list.push_back(interval);
- }
-
-private:
- struct Chunk {
- std::unique_ptr<Chunk> next;
- std::array<MapInterval, 0x8000> data;
- };
-
- void AllocateNewChunk();
-
- void FillFreeList(Chunk& chunk);
-
- std::vector<MapInterval*> free_list;
-
- Chunk first_chunk;
-
- std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 33b3c060b..a3fda1094 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -37,59 +37,43 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
CDmaPusher::~CDmaPusher() = default;
-void CDmaPusher::Push(ChCommandHeaderList&& entries) {
- cdma_queue.push(std::move(entries));
-}
-
-void CDmaPusher::DispatchCalls() {
- while (!cdma_queue.empty()) {
- Step();
- }
-}
-
-void CDmaPusher::Step() {
- const auto entries{cdma_queue.front()};
- cdma_queue.pop();
-
- std::vector<u32> values(entries.size());
- std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
-
- for (const u32 value : values) {
+void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) {
+ for (const auto& value : entries) {
if (mask != 0) {
const auto lbs = static_cast<u32>(std::countr_zero(mask));
mask &= ~(1U << lbs);
- ExecuteCommand(static_cast<u32>(offset + lbs), value);
+ ExecuteCommand(offset + lbs, value.raw);
continue;
} else if (count != 0) {
--count;
- ExecuteCommand(static_cast<u32>(offset), value);
+ ExecuteCommand(offset, value.raw);
if (incrementing) {
++offset;
}
continue;
}
- const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
+ const auto mode = value.submission_mode.Value();
switch (mode) {
case ChSubmissionMode::SetClass: {
- mask = value & 0x3f;
- offset = (value >> 16) & 0xfff;
- current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
+ mask = value.value & 0x3f;
+ offset = value.method_offset;
+ current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
break;
}
case ChSubmissionMode::Incrementing:
case ChSubmissionMode::NonIncrementing:
- count = value & 0xffff;
- offset = (value >> 16) & 0xfff;
+ count = value.value;
+ offset = value.method_offset;
incrementing = mode == ChSubmissionMode::Incrementing;
break;
case ChSubmissionMode::Mask:
- mask = value & 0xffff;
- offset = (value >> 16) & 0xfff;
+ mask = value.value;
+ offset = value.method_offset;
break;
case ChSubmissionMode::Immediate: {
- const u32 data = value & 0xfff;
- offset = (value >> 16) & 0xfff;
- ExecuteCommand(static_cast<u32>(offset), data);
+ const u32 data = value.value & 0xfff;
+ offset = value.method_offset;
+ ExecuteCommand(offset, data);
break;
}
default:
@@ -102,8 +86,8 @@ void CDmaPusher::Step() {
void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
switch (current_class) {
case ChClassId::NvDec:
- ThiStateWrite(nvdec_thi_state, state_offset, {data});
- switch (static_cast<ThiMethod>(state_offset)) {
+ ThiStateWrite(nvdec_thi_state, offset, data);
+ switch (static_cast<ThiMethod>(offset)) {
case ThiMethod::IncSyncpt: {
LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
@@ -120,7 +104,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
static_cast<u32>(nvdec_thi_state.method_0));
nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
- {data});
+ data);
break;
default:
break;
@@ -144,7 +128,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
case ThiMethod::SetMethod1:
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
static_cast<u32>(vic_thi_state.method_0), data);
- vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), {data});
+ vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data);
break;
default:
break;
@@ -153,7 +137,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
case ChClassId::Host1x:
// This device is mainly for syncpoint synchronization
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
- host1x_processor->ProcessMethod(static_cast<Host1x::Method>(state_offset), {data});
+ host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data);
break;
default:
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
@@ -161,10 +145,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
}
}
-void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset,
- const std::vector<u32>& arguments) {
- u8* const state_offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
- std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size());
+void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, u32 argument) {
+ u8* const offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
+ std::memcpy(offset_ptr, &argument, sizeof(u32));
}
} // namespace Tegra
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index e5f212c1a..1bada44dd 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -5,9 +5,7 @@
#pragma once
#include <memory>
-#include <unordered_map>
#include <vector>
-#include <queue>
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -16,9 +14,9 @@
namespace Tegra {
class GPU;
+class Host1x;
class Nvdec;
class Vic;
-class Host1x;
enum class ChSubmissionMode : u32 {
SetClass = 0,
@@ -48,16 +46,10 @@ enum class ChClassId : u32 {
NvDec = 0xf0
};
-enum class ChMethod : u32 {
- Empty = 0,
- SetMethod = 0x10,
- SetData = 0x11,
-};
-
union ChCommandHeader {
u32 raw;
BitField<0, 16, u32> value;
- BitField<16, 12, ChMethod> method_offset;
+ BitField<16, 12, u32> method_offset;
BitField<28, 4, ChSubmissionMode> submission_mode;
};
static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
@@ -99,21 +91,15 @@ public:
explicit CDmaPusher(GPU& gpu_);
~CDmaPusher();
- /// Push NVDEC command buffer entries into queue
- void Push(ChCommandHeaderList&& entries);
-
- /// Process queued command buffer entries
- void DispatchCalls();
-
- /// Process one queue element
- void Step();
+ /// Process the command entry
+ void ProcessEntries(ChCommandHeaderList&& entries);
+private:
/// Invoke command class devices to execute the command based on the current state
void ExecuteCommand(u32 state_offset, u32 data);
-private:
/// Write arguments value to the ThiRegisters member at the specified offset
- void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
+ void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
GPU& gpu;
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
@@ -124,13 +110,10 @@ private:
ThiRegisters vic_thi_state{};
ThiRegisters nvdec_thi_state{};
- s32 count{};
- s32 offset{};
+ u32 count{};
+ u32 offset{};
u32 mask{};
bool incrementing{};
-
- // Queue of command lists to be processed
- std::queue<ChCommandHeaderList> cdma_queue;
};
} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 39bc923a5..d02dc6260 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -44,8 +44,10 @@ Codec::~Codec() {
}
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
- LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec);
- current_codec = codec;
+ if (current_codec != codec) {
+ LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
+ current_codec = codec;
+ }
}
void Codec::StateWrite(u32 offset, u64 arguments) {
@@ -55,7 +57,6 @@ void Codec::StateWrite(u32 offset, u64 arguments) {
void Codec::Decode() {
bool is_first_frame = false;
-
if (!initialized) {
if (current_codec == NvdecCommon::VideoCodec::H264) {
av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index 79e1f4e13..e4f919afd 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -12,16 +12,16 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
Nvdec::~Nvdec() = default;
-void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
+void Nvdec::ProcessMethod(Method method, u32 argument) {
if (method == Method::SetVideoCodec) {
- codec->StateWrite(static_cast<u32>(method), arguments[0]);
+ codec->StateWrite(static_cast<u32>(method), argument);
} else {
- codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
+ codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
}
switch (method) {
case Method::SetVideoCodec:
- codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
+ codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
break;
case Method::Execute:
Execute();
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e4877c533..e66be80b8 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -23,7 +23,7 @@ public:
~Nvdec();
/// Writes the method into the state, Invoke Execute() if encountered
- void ProcessMethod(Method method, const std::vector<u32>& arguments);
+ void ProcessMethod(Method method, u32 argument);
/// Return most recently decoded frame
[[nodiscard]] AVFramePtr GetFrame();
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 55e632346..0a8b82f2b 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -18,18 +18,14 @@ extern "C" {
namespace Tegra {
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
- : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
-Vic::~Vic() = default;
+ : gpu(gpu_),
+ nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {}
-void Vic::VicStateWrite(u32 offset, u32 arguments) {
- u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
- std::memcpy(state_offset, &arguments, sizeof(u32));
-}
+Vic::~Vic() = default;
-void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
- LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method);
- VicStateWrite(static_cast<u32>(method), arguments[0]);
- const u64 arg = static_cast<u64>(arguments[0]) << 8;
+void Vic::ProcessMethod(Method method, u32 argument) {
+ LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
+ const u64 arg = static_cast<u64>(argument) << 8;
switch (method) {
case Method::Execute:
Execute();
@@ -53,8 +49,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
void Vic::Execute() {
if (output_surface_luma_address == 0) {
- LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
- vic_state.output_surface.luma_offset);
+ LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
return;
}
const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
@@ -89,8 +84,10 @@ void Vic::Execute() {
// Get Converted frame
const std::size_t linear_size = frame->width * frame->height * 4;
- using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
- AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
+ // Only allocate frame_buffer once per stream, as the size is not expected to change
+ if (!converted_frame_buffer) {
+ converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
+ }
const int converted_stride{frame->width * 4};
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
@@ -104,18 +101,16 @@ void Vic::Execute() {
const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
block_height, 0);
- std::vector<u8> swizzled_data(size);
+ luma_buffer.resize(size);
Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
- frame->width, 4, swizzled_data.data(),
+ frame->width, 4, luma_buffer.data(),
converted_frame_buffer.get(), block_height, 0, 0);
- gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
- gpu.Maxwell3D().OnMemoryWrite();
+ gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
} else {
// send pitch linear frame
gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
linear_size);
- gpu.Maxwell3D().OnMemoryWrite();
}
break;
}
@@ -134,15 +129,15 @@ void Vic::Execute() {
const auto stride = frame->linesize[0];
const auto half_stride = frame->linesize[1];
- std::vector<u8> luma_buffer(aligned_width * surface_height);
- std::vector<u8> chroma_buffer(aligned_width * half_height);
+ luma_buffer.resize(aligned_width * surface_height);
+ chroma_buffer.resize(aligned_width * half_height);
// Populate luma buffer
for (std::size_t y = 0; y < surface_height - 1; ++y) {
- std::size_t src = y * stride;
- std::size_t dst = y * aligned_width;
+ const std::size_t src = y * stride;
+ const std::size_t dst = y * aligned_width;
- std::size_t size = surface_width;
+ const std::size_t size = surface_width;
for (std::size_t offset = 0; offset < size; ++offset) {
luma_buffer[dst + offset] = luma_ptr[src + offset];
@@ -153,8 +148,8 @@ void Vic::Execute() {
// Populate chroma buffer from both channels with interleaving.
for (std::size_t y = 0; y < half_height; ++y) {
- std::size_t src = y * half_stride;
- std::size_t dst = y * aligned_width;
+ const std::size_t src = y * half_stride;
+ const std::size_t dst = y * aligned_width;
for (std::size_t x = 0; x < half_width; ++x) {
chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
@@ -163,7 +158,6 @@ void Vic::Execute() {
}
gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
chroma_buffer.size());
- gpu.Maxwell3D().OnMemoryWrite();
break;
}
default:
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index 8c4e284a1..f5a2ed100 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -15,43 +15,6 @@ namespace Tegra {
class GPU;
class Nvdec;
-struct PlaneOffsets {
- u32 luma_offset{};
- u32 chroma_u_offset{};
- u32 chroma_v_offset{};
-};
-
-struct VicRegisters {
- INSERT_PADDING_WORDS(64);
- u32 nop{};
- INSERT_PADDING_WORDS(15);
- u32 pm_trigger{};
- INSERT_PADDING_WORDS(47);
- u32 set_application_id{};
- u32 set_watchdog_timer{};
- INSERT_PADDING_WORDS(17);
- u32 context_save_area{};
- u32 context_switch{};
- INSERT_PADDING_WORDS(43);
- u32 execute{};
- INSERT_PADDING_WORDS(63);
- std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
- u32 picture_index{};
- u32 control_params{};
- u32 config_struct_offset{};
- u32 filter_struct_offset{};
- u32 palette_offset{};
- u32 hist_offset{};
- u32 context_id{};
- u32 fce_ucode_size{};
- PlaneOffsets output_surface{};
- u32 fce_ucode_offset{};
- INSERT_PADDING_WORDS(4);
- std::array<u32, 8> slot_context_id{};
- INSERT_PADDING_WORDS(16);
-};
-static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
-
class Vic {
public:
enum class Method : u32 {
@@ -67,14 +30,11 @@ public:
~Vic();
/// Write to the device state.
- void ProcessMethod(Method method, const std::vector<u32>& arguments);
+ void ProcessMethod(Method method, u32 argument);
private:
void Execute();
- void VicStateWrite(u32 offset, u32 arguments);
- VicRegisters vic_state{};
-
enum class VideoPixelFormat : u64_le {
RGBA8 = 0x1f,
BGRA8 = 0x20,
@@ -88,8 +48,6 @@ private:
BitField<9, 2, u64_le> chroma_loc_vert;
BitField<11, 4, u64_le> block_linear_kind;
BitField<15, 4, u64_le> block_linear_height_log2;
- BitField<19, 3, u64_le> reserved0;
- BitField<22, 10, u64_le> reserved1;
BitField<32, 14, u64_le> surface_width_minus1;
BitField<46, 14, u64_le> surface_height_minus1;
};
@@ -97,6 +55,13 @@ private:
GPU& gpu;
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+ /// Avoid reallocation of the following buffers every frame, as their
+ /// size does not change during a stream
+ using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
+ AVMallocPtr converted_frame_buffer;
+ std::vector<u8> luma_buffer;
+ std::vector<u8> chroma_buffer;
+
GPUVAddr config_struct_address{};
GPUVAddr output_surface_luma_address{};
GPUVAddr output_surface_chroma_u_address{};
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index b1eaac00c..7149af290 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -12,13 +12,30 @@
#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace VideoCommon::Dirty {
-
+namespace {
using Tegra::Engines::Maxwell3D;
-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
+void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
+ static constexpr std::size_t num_array = 3;
+ for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
+ const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
+ const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
+
+ FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
+ FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
+ }
+}
+
+void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
+ FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
+}
+
+void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
+}
+void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
static constexpr std::size_t num_per_rt = NUM(rt[0]);
static constexpr std::size_t begin = OFF(rt);
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
+} // Anonymous namespace
+
+void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
+ SetupDirtyVertexBuffers(tables);
+ SetupIndexBuffer(tables);
+ SetupDirtyDescriptors(tables);
+ SetupDirtyRenderTargets(tables);
+}
} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 875527ddd..702688ace 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -30,6 +30,12 @@ enum : u8 {
ColorBuffer7,
ZetaBuffer,
+ VertexBuffers,
+ VertexBuffer0,
+ VertexBuffer31 = VertexBuffer0 + 31,
+
+ IndexBuffer,
+
LastCommonEntry,
};
@@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
FillBlock(tables[1], begin, num, index_b);
}
-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
+void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 2c8b20024..8b33c04ab 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
- // On entering GPU code, assume all memory may be touched by the ARM core.
- gpu.Maxwell3D().OnMemoryWrite();
dma_pushbuffer_subindex = 0;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index a01d334ad..0f640fdae 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {
Fermi2D::~Fermi2D() = default;
-void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0de3280a2..c808a577d 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -38,7 +38,7 @@ public:
~Fermi2D();
/// Binds a rasterizer to this engine.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index ba387506e..a9b75091e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
KeplerCompute::~KeplerCompute() = default;
-void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
upload_state.ProcessData(method_argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 9f0a7b76d..7c40cba38 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -46,7 +46,7 @@ public:
~KeplerCompute();
/// Binds a rasterizer to this engine.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
static constexpr std::size_t NumConstBuffers = 8;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 9911140e9..560551157 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
case KEPLERMEMORY_REG_INDEX(data): {
upload_state.ProcessData(method_argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().OnMemoryWrite();
}
break;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 116ad1722..75517a4f7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
Maxwell3D::~Maxwell3D() = default;
-void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
void Maxwell3D::InitializeRegisterDefaults() {
@@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(data_upload):
upload_state.ProcessData(argument, is_last_call);
if (is_last_call) {
- OnMemoryWrite();
}
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
@@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
}
}
-void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
+void Maxwell3D::ProcessCBBind(size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
- auto& shader = state.shader_stages[stage_index];
- auto& bind_data = regs.cb_bind[stage_index];
-
- ASSERT(bind_data.index < Regs::MaxConstBuffers);
- auto& buffer = shader.const_buffers[bind_data.index];
-
+ const auto& bind_data = regs.cb_bind[stage_index];
+ auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
buffer.enabled = bind_data.valid.Value() != 0;
buffer.address = regs.const_buffer.BufferAddress();
buffer.size = regs.const_buffer.cb_size;
+
+ const bool is_enabled = bind_data.valid.Value() != 0;
+ const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
+ const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
+ rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
void Maxwell3D::ProcessCBData(u32 value) {
@@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {
const u32 id = cb_data_state.id;
memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
- OnMemoryWrite();
cb_data_state.id = null_cb_data;
cb_data_state.current = null_cb_data;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 002d1b3f9..ffed42a29 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -55,7 +55,7 @@ public:
~Maxwell3D();
/// Binds a rasterizer to this engine.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -1314,8 +1314,7 @@ public:
GPUVAddr LimitAddress() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
- limit_low) +
- 1;
+ limit_low);
}
} vertex_array_limit[NumVertexArrays];
@@ -1403,6 +1402,7 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
@@ -1452,11 +1452,6 @@ public:
return *rasterizer;
}
- /// Notify a memory write has happened.
- void OnMemoryWrite() {
- dirty.flags |= dirty.on_write_stores;
- }
-
enum class MMEDrawMode : u32 {
Undefined,
Array,
@@ -1478,7 +1473,6 @@ public:
using Tables = std::array<Table, 2>;
Flags flags;
- Flags on_write_stores;
Tables tables{};
} dirty;
@@ -1541,7 +1535,7 @@ private:
void FinishCBData();
/// Handles a write to the CB_BIND register.
- void ProcessCBBind(std::size_t stage_index);
+ void ProcessCBBind(size_t stage_index);
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ba750748c..a2f19559f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
return;
}
- // All copies here update the main memory, so mark all rasterizer states as invalid.
- system.GPU().Maxwell3D().OnMemoryWrite();
-
if (is_src_pitch && is_dst_pitch) {
CopyPitchToPitch();
} else {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 3512283ff..f055b61e9 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -143,22 +143,26 @@ private:
}
bool ShouldWait() const {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
query_cache.ShouldWaitAsyncFlushes();
}
bool ShouldFlush() const {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
query_cache.HasUncommittedFlushes();
}
void PopAsyncFlushes() {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
query_cache.CommitAsyncFlushes();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6ab06775f..51c63af4a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -30,8 +30,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
: system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
- dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
- cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
+ dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_},
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
fermi_2d{std::make_unique<Engines::Fermi2D>()},
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
@@ -44,8 +43,8 @@ GPU::~GPU() = default;
void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
renderer = std::move(renderer_);
+ rasterizer = renderer->ReadRasterizer();
- VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
@@ -171,7 +170,7 @@ void GPU::TickWork() {
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
- renderer->Rasterizer().FlushRegion(addr, size);
+ rasterizer->FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
@@ -193,11 +192,11 @@ u64 GPU::GetTicks() const {
}
void GPU::FlushCommands() {
- renderer->Rasterizer().FlushCommands();
+ rasterizer->FlushCommands();
}
void GPU::SyncGuestHost() {
- renderer->Rasterizer().SyncGuestHost();
+ rasterizer->SyncGuestHost();
}
enum class GpuSemaphoreOperation {
@@ -494,8 +493,7 @@ void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
- cdma_pusher->Push(std::move(entries));
- cdma_pusher->DispatchCalls();
+ cdma_pusher->ProcessEntries(std::move(entries));
}
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b4ce6b154..b2ee45496 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -366,6 +366,7 @@ protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
private:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 7e490bcc3..eb0e43c0c 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
}
auto current_context = context.Acquire();
+ VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
CommandDataContainer next;
while (state.is_running) {
@@ -47,18 +48,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
dma_pusher.DispatchCalls();
} else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
// NVDEC
- cdma_pusher.Push(std::move(command_list->entries));
- cdma_pusher.DispatchCalls();
+ cdma_pusher.ProcessEntries(std::move(command_list->entries));
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
- renderer.Rasterizer().ReleaseFences();
+ rasterizer->ReleaseFences();
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
- renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
+ rasterizer->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
- renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
+ rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return;
} else {
@@ -84,6 +84,7 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
+ rasterizer = renderer.ReadRasterizer();
thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
}
@@ -129,12 +130,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
- system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+ rasterizer->OnCPUWrite(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
- system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+ rasterizer->OnCPUWrite(addr, size);
}
void ThreadManager::WaitIdle() const {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2775629e7..4cd951169 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -27,6 +27,7 @@ class System;
} // namespace Core
namespace VideoCore {
+class RasterizerInterface;
class RendererBase;
} // namespace VideoCore
@@ -151,11 +152,12 @@ private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
- SynchState state;
Core::System& system;
- std::thread thread;
- std::thread::id thread_id;
const bool is_async;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+ SynchState state;
+ std::thread thread;
};
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 28f2b8614..970120acc 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -12,7 +12,6 @@ set(SHADER_FILES
vulkan_blit_depth_stencil.frag
vulkan_present.frag
vulkan_present.vert
- vulkan_quad_array.comp
vulkan_quad_indexed.comp
vulkan_uint8.comp
)
diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp
deleted file mode 100644
index 212f4e998..000000000
--- a/src/video_core/host_shaders/vulkan_quad_array.comp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 460 core
-
-layout (local_size_x = 1024) in;
-
-layout (std430, set = 0, binding = 0) buffer OutputBuffer {
- uint output_indexes[];
-};
-
-layout (push_constant) uniform PushConstants {
- uint first;
-};
-
-void main() {
- uint primitive = gl_GlobalInvocationID.x;
- if (primitive * 6 >= output_indexes.length()) {
- return;
- }
-
- const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
- for (uint vertex = 0; vertex < 6; ++vertex) {
- uint index = first + primitive * 4 + quad_map[vertex];
- output_indexes[primitive * 6 + vertex] = index;
- }
-}
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index ad74d7af9..872291670 100644
--- a/src/video_core/host_shaders/vulkan_uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
uint16_t output_indexes[];
};
+uint AssembleIndex(uint id) {
+ // Most primitive restart indices are 0xFF
+ // Hardcode this to 0xFF for now
+ uint index = uint(input_indexes[id]);
+ return index == 0xFF ? 0xFFFF : index;
+}
+
void main() {
uint id = gl_GlobalInvocationID.x;
if (id < input_indexes.length()) {
- output_indexes[id] = uint16_t(input_indexes[id]);
+ output_indexes[id] = uint16_t(AssembleIndex(id));
}
}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index c841f3cd7..4eb71efbd 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -6,7 +6,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/hle/kernel/memory/page_table.h"
+#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/process.h"
#include "core/memory.h"
#include "video_core/gpu.h"
@@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)
MemoryManager::~MemoryManager() = default;
-void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
- rasterizer = &rasterizer_;
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
}
GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b468a67de..b3538d503 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -72,7 +72,7 @@ public:
~MemoryManager();
/// Binds a renderer to the memory manager.
- void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
@@ -157,6 +157,8 @@ private:
using MapRange = std::pair<GPUVAddr, size_t>;
std::vector<MapRange> map_ranges;
+
+ std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
};
} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0cb0f387d..50491b758 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -7,6 +7,7 @@
#include <atomic>
#include <functional>
#include <optional>
+#include <span>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
@@ -49,6 +50,10 @@ public:
/// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
+ /// Signal an uniform buffer binding
+ virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) = 0;
+
/// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 51dde8eb5..320ee8d30 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -37,15 +37,11 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
- /// Initialize the renderer
- [[nodiscard]] virtual bool Init() = 0;
-
- /// Shutdown the renderer
- virtual void ShutDown() = 0;
-
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
+ [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
+
// Getter/setter functions:
// ------------------------
@@ -57,14 +53,6 @@ public:
return m_current_frame;
}
- [[nodiscard]] RasterizerInterface& Rasterizer() {
- return *rasterizer;
- }
-
- [[nodiscard]] const RasterizerInterface& Rasterizer() const {
- return *rasterizer;
- }
-
[[nodiscard]] Core::Frontend::GraphicsContext& Context() {
return *context;
}
@@ -98,7 +86,6 @@ public:
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
- std::unique_ptr<RasterizerInterface> rasterizer;
std::unique_ptr<Core::Frontend::GraphicsContext> context;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5772cad87..6da3906a4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <memory>
+#include <span>
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/microprofile.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+namespace {
+struct BindlessSSBO {
+ GLuint64EXT address;
+ GLsizei length;
+ GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
+
+constexpr std::array PROGRAM_LUT{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+ buffer.Create();
+ const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
+ glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
+ glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
+
+ if (runtime.has_unified_vertex_buffers) {
+ glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
+ }
+}
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
+ glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
-MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
+void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
+ glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+}
-Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
- : BufferBlock{cpu_addr_, size_} {
- gl_buffer.Create();
- glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
- if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
- glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
- glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+void Buffer::MakeResident(GLenum access) noexcept {
+ // Abuse GLenum's order to exit early
+ // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
+ if (access <= current_residency_access || buffer.handle == 0) {
+ return;
+ }
+ if (std::exchange(current_residency_access, access) != GL_NONE) {
+ // If the buffer is already resident, remove its residency before promoting it
+ glMakeNamedBufferNonResidentNV(buffer.handle);
}
+ glMakeNamedBufferResidentNV(buffer.handle, access);
}
-Buffer::~Buffer() = default;
-
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
- glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
- static_cast<GLsizeiptr>(data_size), data);
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
+ : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+ use_assembly_shaders{device.UseAssemblyShaders()},
+ has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
+ stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
+ GLint gl_max_attributes;
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
+ max_attributes = static_cast<u32>(gl_max_attributes);
+ for (auto& stage_uniforms : fast_uniforms) {
+ for (OGLBuffer& buffer : stage_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
+ }
+ }
+ for (auto& stage_uniforms : copy_uniforms) {
+ for (OGLBuffer& buffer : stage_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+ }
+ }
+ for (OGLBuffer& buffer : copy_compute_uniforms) {
+ buffer.Create();
+ glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+ }
}
-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
- MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
- const GLintptr gl_offset = static_cast<GLintptr>(offset);
- if (read_buffer.handle == 0) {
- read_buffer.Create();
- glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
- GL_STREAM_READ);
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ for (const VideoCommon::BufferCopy& copy : copies) {
+ glCopyNamedBufferSubData(
+ src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
+ static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
}
- glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
- glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
- glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
}
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size) {
- glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
+void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
+ if (has_unified_vertex_buffers) {
+ buffer.MakeResident(GL_READ_ONLY);
+ glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
+ index_buffer_offset = offset;
+ }
}
-OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- const Device& device_, OGLStreamBuffer& stream_buffer_,
- StateTracker& state_tracker)
- : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
- if (!device.HasFastBufferSubData()) {
+void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
+ u32 stride) {
+ if (index >= max_attributes) {
return;
}
-
- static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
- glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
- for (const GLuint cbuf : cbufs) {
- glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+ if (has_unified_vertex_buffers) {
+ buffer.MakeResident(GL_READ_ONLY);
+ glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
+ buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
+ } else {
+ glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
+ static_cast<GLsizei>(stride));
}
}
-OGLBufferCache::~OGLBufferCache() {
- glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+ u32 offset, u32 size) {
+ if (use_assembly_shaders) {
+ GLuint handle;
+ if (offset != 0) {
+ handle = copy_uniforms[stage][binding_index].handle;
+ glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+ } else {
+ handle = buffer.Handle();
+ }
+ glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<Buffer>(device, cpu_addr, size);
+void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+ u32 size) {
+ if (use_assembly_shaders) {
+ GLuint handle;
+ if (offset != 0) {
+ handle = copy_compute_uniforms[binding_index].handle;
+ glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+ } else {
+ handle = buffer.Handle();
+ }
+ glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
+ static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
- return {0, 0, 0};
+void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+ u32 offset, u32 size, bool is_written) {
+ if (use_assembly_shaders) {
+ const BindlessSSBO ssbo{
+ .address = buffer.HostGpuAddr() + offset,
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
+ buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+ glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
}
-OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
- std::size_t size) {
- DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
- const GLuint cbuf = cbufs[cbuf_cursor++];
+void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+ u32 size, bool is_written) {
+ if (use_assembly_shaders) {
+ const BindlessSSBO ssbo{
+ .address = buffer.HostGpuAddr() + offset,
+ .length = static_cast<GLsizei>(size),
+ .padding = 0,
+ };
+ buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+ glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
+ reinterpret_cast<const GLuint*>(&ssbo));
+ } else if (size == 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ }
+}
- glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
- return {cbuf, 0, 0};
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
+ u32 size) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 17ee90316..d8b20a9af 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,79 +5,157 @@
#pragma once
#include <array>
-#include <memory>
+#include <span>
+#include "common/alignment.h"
#include "common/common_types.h"
+#include "common/dynamic_library.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-namespace Core {
-class System;
-}
-
namespace OpenGL {
-class Device;
-class OGLStreamBuffer;
-class RasterizerOpenGL;
-class StateTracker;
+class BufferCacheRuntime;
-class Buffer : public VideoCommon::BufferBlock {
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
public:
- explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
- ~Buffer();
+ explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
+ u64 size_bytes);
+ explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
- void Upload(std::size_t offset, std::size_t data_size, const u8* data);
+ void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
- void Download(std::size_t offset, std::size_t data_size, u8* data);
+ void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
- void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size);
+ void MakeResident(GLenum access) noexcept;
- GLuint Handle() const noexcept {
- return gl_buffer.handle;
+ [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
+ return address;
}
- u64 Address() const noexcept {
- return gpu_address;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
}
private:
- OGLBuffer gl_buffer;
- OGLBuffer read_buffer;
- u64 gpu_address = 0;
+ GLuint64EXT address = 0;
+ OGLBuffer buffer;
+ GLenum current_residency_access = GL_NONE;
};
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
-class OGLBufferCache final : public GenericBufferCache {
+class BufferCacheRuntime {
+ friend Buffer;
+
public:
- explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
- Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
- const Device& device, OGLStreamBuffer& stream_buffer,
- StateTracker& state_tracker);
- ~OGLBufferCache();
+ static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
+
+ explicit BufferCacheRuntime(const Device& device_);
+
+ void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies);
+
+ void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
+
+ void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+
+ void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+ bool is_written);
+
+ void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+ bool is_written);
+
+ void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+
+ void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+ if (use_assembly_shaders) {
+ const GLuint handle = fast_uniforms[stage][binding_index].handle;
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+ glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
+ } else {
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+ fast_uniforms[stage][binding_index].handle, 0,
+ static_cast<GLsizeiptr>(size));
+ }
+ }
- BufferInfo GetEmptyBuffer(std::size_t) override;
+ void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
+ if (use_assembly_shaders) {
+ glProgramBufferParametersIuivNV(
+ PABO_LUT[stage], binding_index, 0,
+ static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
+ reinterpret_cast<const GLuint*>(data.data()));
+ } else {
+ glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
+ static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+ }
+ }
- void Acquire() noexcept {
- cbuf_cursor = 0;
+ std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
+ const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
+ const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ return mapped_span;
}
-protected:
- std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+ [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
+ return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
+ }
- BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+ [[nodiscard]] bool HasFastBufferSubData() const noexcept {
+ return has_fast_buffer_sub_data;
+ }
private:
- static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+ static constexpr std::array PABO_LUT{
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+ };
const Device& device;
- std::size_t cbuf_cursor = 0;
- std::array<GLuint, NUM_CBUFS> cbufs{};
+ bool has_fast_buffer_sub_data = false;
+ bool use_assembly_shaders = false;
+ bool has_unified_vertex_buffers = false;
+
+ u32 max_attributes = 0;
+
+ std::optional<StreamBuffer> stream_buffer;
+
+ std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+ VideoCommon::NUM_STAGES>
+ fast_uniforms;
+ std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+ VideoCommon::NUM_STAGES>
+ copy_uniforms;
+ std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
+
+ u32 index_buffer_offset = 0;
+};
+
+struct BufferCacheParams {
+ using Runtime = OpenGL::BufferCacheRuntime;
+ using Buffer = OpenGL::Buffer;
+
+ static constexpr bool IS_OPENGL = true;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
+ static constexpr bool USE_MEMORY_MAPS = false;
};
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 04c267ee4..1ae5f1d62 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
-
namespace {
-
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
@@ -197,11 +195,13 @@ bool IsASTCSupported() {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
}
-
} // Anonymous namespace
-Device::Device()
- : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
+Device::Device() {
+ if (!GLAD_GL_VERSION_4_6) {
+ LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
+ throw std::runtime_error{"Insufficient version"};
+ }
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
@@ -217,6 +217,9 @@ Device::Device()
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
+
+ max_uniform_buffers = BuildMaxUniformBuffers();
+ base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -236,6 +239,7 @@ Device::Device()
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
+ has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -272,6 +276,7 @@ Device::Device(std::nullptr_t) {
has_image_load_formatted = true;
has_texture_shadow_lod = true;
has_variable_aoffi = true;
+ has_depth_buffer_float = true;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9141de635..f24bd0c7b 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@
namespace OpenGL {
-static constexpr u32 EmulationUniformBlockBinding = 0;
-
-class Device final {
+class Device {
public:
- struct BaseBindings final {
+ struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
@@ -124,6 +122,10 @@ public:
return use_driver_cache;
}
+ bool HasDepthBufferFloat() const {
+ return has_depth_buffer_float;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -152,6 +154,7 @@ private:
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
+ bool has_depth_buffer_float{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 3e9c922f5..151290101 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
Tegra::GPU& gpu_, TextureCache& texture_cache_,
- OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
+ BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 30dbee613..e714aa115 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -32,14 +32,13 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
- QueryCache& query_cache_);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8aa63d329..4610fd160 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -38,37 +38,21 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using GLvec4 = std::array<GLfloat, 4>;
using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
-constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
- NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr size_t TOTAL_CONST_BUFFER_BYTES =
- NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
-
-constexpr size_t MAX_TEXTURES = 192;
-constexpr size_t MAX_IMAGES = 48;
struct TextureHandle {
constexpr TextureHandle(u32 data, bool via_header_index) {
@@ -104,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
-std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry) {
- if (!entry.IsIndirect()) {
- return entry.GetSize();
- }
- if (buffer.size > Maxwell::MaxConstBufferSize) {
- LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
- Maxwell::MaxConstBufferSize);
- return Maxwell::MaxConstBufferSize;
- }
-
- return buffer.size;
-}
-
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -150,14 +120,6 @@ void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
- if (num_ssbos == 0) {
- return;
- }
- glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
- reinterpret_cast<const GLuint*>(ssbos));
-}
-
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::Buffer;
@@ -204,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
- stream_buffer(device, state_tracker),
texture_cache_runtime(device, program_manager, state_tracker),
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ buffer_cache_runtime(device),
+ buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
query_cache(*this, maxwell3d, gpu_memory),
- buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
async_shaders(emu_window_) {
- unified_uniform_buffer.Create();
- glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
-
- if (device.UseAssemblyShaders()) {
- glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
- for (const GLuint cbuf : staging_cbufs) {
- glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
- nullptr, 0);
- }
- }
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
}
-RasterizerOpenGL::~RasterizerOpenGL() {
- if (device.UseAssemblyShaders()) {
- glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
- }
-}
+RasterizerOpenGL::~RasterizerOpenGL() = default;
-void RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SyncVertexFormats() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
flags[Dirty::VertexFormats] = false;
- MICROPROFILE_SCOPE(OpenGL_VAO);
-
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
// the first 16 vertex attributes always, as we don't know which ones are actually used until
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -277,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
}
-void RasterizerOpenGL::SetupVertexBuffer() {
- auto& flags = maxwell3d.dirty.flags;
- if (!flags[Dirty::VertexBuffers]) {
- return;
- }
- flags[Dirty::VertexBuffers] = false;
-
- MICROPROFILE_SCOPE(OpenGL_VB);
-
- const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
-
- // Upload all guest vertex arrays sequentially to our buffer
- const auto& regs = maxwell3d.regs;
- for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
- if (!flags[Dirty::VertexBuffer0 + index]) {
- continue;
- }
- flags[Dirty::VertexBuffer0 + index] = false;
-
- const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled()) {
- continue;
- }
-
- const GPUVAddr start = vertex_array.StartAddress();
- const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
- ASSERT(end >= start);
-
- const GLuint gl_index = static_cast<GLuint>(index);
- const u64 size = end - start;
- if (size == 0) {
- glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
- if (use_unified_memory) {
- glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
- }
- continue;
- }
- const auto info = buffer_cache.UploadMemory(start, size);
- if (use_unified_memory) {
- glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
- glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
- info.address + info.offset, size);
- } else {
- glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
- }
- }
-}
-
-void RasterizerOpenGL::SetupVertexInstances() {
+void RasterizerOpenGL::SyncVertexInstances() {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
@@ -346,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
}
}
-GLintptr RasterizerOpenGL::SetupIndexBuffer() {
- MICROPROFILE_SCOPE(OpenGL_Index);
- const auto& regs = maxwell3d.regs;
- const std::size_t size = CalculateIndexBufferSize();
- const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
- return info.offset;
-}
-
-void RasterizerOpenGL::SetupShaders() {
- MICROPROFILE_SCOPE(OpenGL_Shader);
+void RasterizerOpenGL::SetupShaders(bool is_indexed) {
u32 clip_distances = 0;
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -413,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() {
const size_t stage = index == 0 ? 0 : index - 1;
shaders[stage] = shader;
- SetupDrawConstBuffers(stage, shader);
- SetupDrawGlobalMemory(stage, shader);
SetupDrawTextures(shader, stage);
SetupDrawImages(shader, stage);
+ buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
+
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ u32 ssbo_index = 0;
+ for (const auto& buffer : shader->GetEntries().global_memory_entries) {
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+ buffer.cbuf_offset, buffer.is_written);
+ ++ssbo_index;
+ }
+
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -433,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() {
SyncClipEnabled(clip_distances);
maxwell3d.dirty.flags[Dirty::Shaders] = false;
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
size_t image_view_index = 0;
size_t texture_index = 0;
size_t image_index = 0;
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
const Shader* const shader = shaders[stage];
- if (shader) {
- const auto base = device.GetBaseBindings(stage);
- BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
- texture_index, image_index);
- }
- }
-}
-
-std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = maxwell3d.regs;
-
- std::size_t size = 0;
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!regs.vertex_array[index].IsEnabled())
+ if (!shader) {
continue;
-
- const GPUVAddr start = regs.vertex_array[index].StartAddress();
- const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
- size += end - start;
- ASSERT(end >= start);
+ }
+ buffer_cache.BindHostStageBuffers(stage);
+ const auto& base = device.GetBaseBindings(stage);
+ BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+ texture_index, image_index);
}
-
- return size;
-}
-
-std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
- static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -478,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
}
void RasterizerOpenGL::Clear() {
+ MICROPROFILE_SCOPE(OpenGL_Clears);
if (!maxwell3d.ShouldExecute()) {
return;
}
@@ -528,11 +408,9 @@ void RasterizerOpenGL::Clear() {
}
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- {
- auto lock = texture_cache.AcquireLock();
- texture_cache.UpdateRenderTargets(true);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- }
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UpdateRenderTargets(true);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -544,7 +422,6 @@ void RasterizerOpenGL::Clear() {
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
-
++num_queued_commands;
}
@@ -553,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
- SyncViewport();
- SyncRasterizeEnable();
- SyncPolygonModes();
- SyncColorMask();
- SyncFragmentColorClampState();
- SyncMultiSampleState();
- SyncDepthTestState();
- SyncDepthClamp();
- SyncStencilTestState();
- SyncBlendState();
- SyncLogicOpState();
- SyncCullMode();
- SyncPrimitiveRestart();
- SyncScissorTest();
- SyncPointState();
- SyncLineState();
- SyncPolygonOffset();
- SyncAlphaTest();
- SyncFramebufferSRGB();
-
- buffer_cache.Acquire();
- current_cbuf = 0;
-
- std::size_t buffer_size = CalculateVertexArraysSize();
-
- // Add space for index buffer
- if (is_indexed) {
- buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
- }
-
- // Uniform space for the 5 shader stages
- buffer_size =
- Common::AlignUp<std::size_t>(buffer_size, 4) +
- (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
-
- // Add space for at least 18 constant buffers
- buffer_size += Maxwell::MaxConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-
- // Prepare the vertex array.
- buffer_cache.Map(buffer_size);
-
- // Prepare vertex array format.
- SetupVertexFormat();
-
- // Upload vertex and index data.
- SetupVertexBuffer();
- SetupVertexInstances();
- GLintptr index_buffer_offset = 0;
- if (is_indexed) {
- index_buffer_offset = SetupIndexBuffer();
- }
-
- // Setup emulation uniform buffer.
- if (!device.UseAssemblyShaders()) {
- MaxwellUniformData ubo;
- ubo.SetFromRegs(maxwell3d);
- const auto info =
- buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
- glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
- }
+ SyncState();
// Setup shaders and their used resources.
- auto lock = texture_cache.AcquireLock();
- SetupShaders();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ SetupShaders(is_indexed);
- // Signal the buffer cache that we are not going to upload more things.
- buffer_cache.Unmap();
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
@@ -635,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
- const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
+ const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -675,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- buffer_cache.Acquire();
- current_cbuf = 0;
-
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
BindComputeTextures(kernel);
- const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
- buffer_cache.Map(buffer_size);
-
- SetupComputeConstBuffers(kernel);
- SetupComputeGlobalMemory(kernel);
-
- buffer_cache.Unmap();
+ const auto& entries = kernel->GetEntries();
+ buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+ buffer_cache.UnbindComputeStorageBuffers();
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_memory_entries) {
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+ buffer.is_written);
+ ++ssbo_index;
+ }
+ buffer_cache.UpdateComputeBuffers();
+ buffer_cache.BindHostComputeBuffers();
const auto& launch_desc = kepler_compute.launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -706,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
+void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -714,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
- buffer_cache.FlushRegion(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.DownloadMemory(addr, size);
+ }
query_cache.FlushRegion(addr, size);
}
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.MustFlushRegion(addr, size);
+ return buffer_cache.IsRegionGpuModified(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
- buffer_cache.MustFlushRegion(addr, size);
+ buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -735,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
shader_cache.InvalidateRegion(addr, size);
- buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -748,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
+ shader_cache.OnCPUWrite(addr, size);
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
- shader_cache.OnCPUWrite(addr, size);
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.CachedWriteMemory(addr, size);
+ }
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- buffer_cache.SyncGuestHost();
shader_cache.SyncGuestHost();
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.FlushCachedWrites();
+ }
}
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
shader_cache.OnCPUWrite(addr, size);
}
@@ -802,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::WaitForIdle() {
- // Place a barrier on everything that is not framebuffer related.
- // This is related to another flag that is not currently implemented.
- glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
- GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
- GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
- GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
- GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
- GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
void RasterizerOpenGL::FragmentBarrier() {
@@ -834,18 +663,21 @@ void RasterizerOpenGL::TickFrame() {
num_queued_commands = 0;
fence_manager.TickFrame();
- buffer_cache.TickFrame();
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.TickFrame();
+ }
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@@ -857,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
if (!image_view) {
return false;
@@ -924,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
}
}
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
- static constexpr std::array PARAMETER_LUT{
- GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
- GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
- GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
- };
- MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& stages = maxwell3d.state.shader_stages;
- const auto& shader_stage = stages[stage_index];
- const auto& entries = shader->GetEntries();
- const bool use_unified = entries.use_unified_uniforms;
- const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
-
- const auto base_bindings = device.GetBaseBindings(stage_index);
- u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
- for (const auto& entry : entries.const_buffers) {
- const u32 index = entry.GetIndex();
- const auto& buffer = shader_stage.const_buffers[index];
- SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
- base_unified_offset + index * Maxwell::MaxConstBufferSize);
- ++binding;
- }
- if (use_unified) {
- const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
- entries.global_memory_entries.size());
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
- base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
- }
-}
-
-void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
- MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& launch_desc = kepler_compute.launch_description;
- const auto& entries = kernel->GetEntries();
- const bool use_unified = entries.use_unified_uniforms;
-
- u32 binding = 0;
- for (const auto& entry : entries.const_buffers) {
- const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
- Tegra::Engines::ConstBufferInfo buffer;
- buffer.address = config.Address();
- buffer.size = config.size;
- buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
- use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
- ++binding;
- }
- if (use_unified) {
- const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
- NUM_CONST_BUFFERS_BYTES_PER_STAGE);
- }
-}
-
-void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
- const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry, bool use_unified,
- std::size_t unified_offset) {
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- if (device.UseAssemblyShaders()) {
- glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
- } else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
- }
- return;
- }
-
- // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
- // UBO alignment requirements.
- const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
-
- const bool fast_upload = !use_unified && device.HasFastBufferSubData();
-
- const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
- const GPUVAddr gpu_addr = buffer.address;
- auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
-
- if (device.UseAssemblyShaders()) {
- UNIMPLEMENTED_IF(use_unified);
- if (info.offset != 0) {
- const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
- glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
- info.handle = staging_cbuf;
- info.offset = 0;
- }
- glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
- return;
- }
-
- if (use_unified) {
- glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
- unified_offset, size);
- } else {
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
- }
-}
-
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
- static constexpr std::array TARGET_LUT = {
- GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
- GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
- };
- const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
- const auto& entries{shader->GetEntries().global_memory_entries};
-
- std::array<BindlessSSBO, 32> ssbos;
- ASSERT(entries.size() < ssbos.size());
-
- const bool assembly_shaders = device.UseAssemblyShaders();
- u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : entries) {
- const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
- const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
- const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
- ++binding;
- }
- if (assembly_shaders) {
- UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
- }
-}
-
-void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
- const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
- const auto& entries{kernel->GetEntries().global_memory_entries};
-
- std::array<BindlessSSBO, 32> ssbos;
- ASSERT(entries.size() < ssbos.size());
-
- u32 binding = 0;
- for (const auto& entry : entries) {
- const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
- const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
- const u32 size{gpu_memory.Read<u32>(addr + 8)};
- SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
- ++binding;
- }
- if (device.UseAssemblyShaders()) {
- UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
- }
-}
-
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
- GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
- const size_t alignment{device.GetShaderStorageBufferAlignment()};
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
- if (device.UseAssemblyShaders()) {
- *ssbo = BindlessSSBO{
- .address = static_cast<GLuint64EXT>(info.address + info.offset),
- .length = static_cast<GLsizei>(size),
- .padding = 0,
- };
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
- static_cast<GLsizeiptr>(size));
- }
-}
-
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
const bool via_header_index =
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1131,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
}
}
+void RasterizerOpenGL::SyncState() {
+ SyncViewport();
+ SyncRasterizeEnable();
+ SyncPolygonModes();
+ SyncColorMask();
+ SyncFragmentColorClampState();
+ SyncMultiSampleState();
+ SyncDepthTestState();
+ SyncDepthClamp();
+ SyncStencilTestState();
+ SyncBlendState();
+ SyncLogicOpState();
+ SyncCullMode();
+ SyncPrimitiveRestart();
+ SyncScissorTest();
+ SyncPointState();
+ SyncLineState();
+ SyncPolygonOffset();
+ SyncAlphaTest();
+ SyncFramebufferSRGB();
+ SyncVertexFormats();
+ SyncVertexInstances();
+}
+
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d.dirty.flags;
const auto& regs = maxwell3d.regs;
@@ -1166,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() {
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
- glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
- regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
- : GL_NEGATIVE_ONE_TO_ONE);
+ const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
+ const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+ const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
+ state_tracker.ClipControl(origin, depth);
+ state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
}
if (dirty_viewport) {
@@ -1191,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() {
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
- glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ if (device.HasDepthBufferFloat()) {
+ glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
+ } else {
+ glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ }
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
@@ -1652,36 +1354,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
if (regs.tfb_enabled == 0) {
return;
}
-
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
-
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
- for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
- const auto& binding = regs.tfb_bindings[index];
- if (!binding.buffer_enable) {
- if (enabled_transform_feedback_buffers[index]) {
- glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
- 0);
- }
- enabled_transform_feedback_buffers[index] = false;
- continue;
- }
- enabled_transform_feedback_buffers[index] = true;
-
- auto& tfb_buffer = transform_feedback_buffers[index];
- tfb_buffer.Create();
-
- const GLuint handle = tfb_buffer.handle;
- const std::size_t size = binding.buffer_size;
- glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
- glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
- static_cast<GLsizeiptr>(size));
- }
+ UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1695,23 +1374,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
-
glEndTransformFeedback();
-
- for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
- const auto& binding = regs.tfb_bindings[index];
- if (!binding.buffer_enable) {
- continue;
- }
- UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
- const GLuint handle = transform_feedback_buffers[index].handle;
- const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
- static_cast<GLsizeiptr>(size));
- }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82e03e677..3745cf637 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,6 @@
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
@@ -72,6 +71,7 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -119,27 +119,6 @@ private:
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
size_t& image_view_index, size_t& texture_index, size_t& image_index);
- /// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
-
- /// Configures the current constbuffers to use for the kernel invocation.
- void SetupComputeConstBuffers(Shader* kernel);
-
- /// Configures a constant buffer.
- void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry, bool use_unified,
- std::size_t unified_offset);
-
- /// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
-
- /// Configures the current global memory entries to use for the kernel invocation.
- void SetupComputeGlobalMemory(Shader* kernel);
-
- /// Configures a global memory buffer.
- void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
- size_t size, BindlessSSBO* ssbo);
-
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(const Shader* shader, size_t stage_index);
@@ -152,6 +131,9 @@ private:
/// Configures images in a compute shader.
void SetupComputeImages(const Shader* shader);
+ /// Syncs state to match guest's
+ void SyncState();
+
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -215,6 +197,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Syncs vertex formats to match the guest state
+ void SyncVertexFormats();
+
+ /// Syncs vertex instances to match the guest state
+ void SyncVertexInstances();
+
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
@@ -225,19 +213,7 @@ private:
/// End a transform feedback
void EndTransformFeedback();
- std::size_t CalculateVertexArraysSize() const;
-
- std::size_t CalculateIndexBufferSize() const;
-
- /// Updates the current vertex format
- void SetupVertexFormat();
-
- void SetupVertexBuffer();
- void SetupVertexInstances();
-
- GLintptr SetupIndexBuffer();
-
- void SetupShaders();
+ void SetupShaders(bool is_indexed);
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +225,12 @@ private:
ProgramManager& program_manager;
StateTracker& state_tracker;
- OGLStreamBuffer stream_buffer;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
+ BufferCacheRuntime buffer_cache_runtime;
+ BufferCache buffer_cache;
ShaderCacheOpenGL shader_cache;
QueryCache query_cache;
- OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +238,8 @@ private:
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
- std::array<GLuint, MAX_TEXTURES> texture_handles;
- std::array<GLuint, MAX_IMAGES> image_handles;
-
- std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
- transform_feedback_buffers;
- std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
- enabled_transform_feedback_buffers;
-
- static constexpr std::size_t NUM_CONSTANT_BUFFERS =
- Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
- std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
- std::size_t current_cbuf = 0;
- OGLBuffer unified_uniform_buffer;
+ std::array<GLuint, MAX_TEXTURES> texture_handles{};
+ std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0e34a0f20..3428e5e21 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
handle = 0;
}
-void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
- ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
-
- glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
-}
-
void OGLSync::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f48398669..552d79db4 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -234,9 +234,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
- // Converts the buffer into a stream copy buffer with a fixed size
- void MakeStreamCopy(std::size_t buffer_size);
-
GLuint handle = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 529570ff0..5cf7cd151 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
+
+ LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
+ transferable.has_value() ? transferable->size() : 0);
+
if (!transferable) {
return;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c35b71b6b..ac78d344c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
@@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-
-layout (std140, binding = {}) uniform vs_config {{
- float y_direction;
-}};
)";
class ShaderWriter final {
@@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
-bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
- const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
- // We waste one UBO for emulation
- const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
- return num_ubos > num_available_ubos;
-}
-
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@@ -420,9 +409,8 @@ public:
explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
ShaderType stage_, std::string_view identifier_,
std::string_view suffix_)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_},
- suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{
- UseUnifiedUniforms(device_, ir_, stage_)} {
+ : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
+ identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -516,7 +504,8 @@ private:
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
- code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
+ const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
+ code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -542,7 +531,7 @@ private:
code.AddNewLine();
- code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
+ code.AddLine(COMMON_DECLARATIONS);
}
void DeclareVertex() {
@@ -865,17 +854,6 @@ private:
}
void DeclareConstantBuffers() {
- if (use_unified_uniforms) {
- const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
- static_cast<u32>(ir.GetGlobalMemory().size());
- code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
- binding);
- code.AddLine(" uint cbufs[];");
- code.AddLine("}};");
- code.AddNewLine();
- return;
- }
-
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
for (const auto& [index, info] : ir.GetConstantBuffers()) {
const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
@@ -1081,29 +1059,17 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
- const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- if (use_unified_uniforms) {
- return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
- Type::Uint};
- } else {
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
- }
-
- // Indirect access
- if (use_unified_uniforms) {
- return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
- Visit(offset).AsUint()),
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
Type::Uint};
}
+ // Indirect access
const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
@@ -2293,7 +2259,6 @@ private:
}
}
}
-
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
@@ -2337,7 +2302,8 @@ private:
}
Expression YNegate(Operation operation) {
- return {"y_direction", Type::Float};
+ // Y_NEGATE is mapped to this uniform value
+ return {"gl_FrontMaterial.ambient.a", Type::Float};
}
template <u32 element>
@@ -2787,7 +2753,6 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
- const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
+ for (const auto& buffer : entries.const_buffers) {
+ entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+ }
entries.shader_length = ir.GetLength();
- entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index be68994bb..0397a000c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -55,7 +55,7 @@ struct ShaderEntries {
std::vector<ImageEntry> images;
std::size_t shader_length{};
u32 clip_distances{};
- bool use_unified_uniforms{};
+ u32 enabled_uniform_buffers{};
};
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 60e6fa39f..dbdf5230f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
}
-void SetupDirtyVertexArrays(Tables& tables) {
- static constexpr std::size_t num_array = 3;
+void SetupDirtyVertexInstances(Tables& tables) {
static constexpr std::size_t instance_base_offset = 3;
for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
- const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
-
- FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
- FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
-
const std::size_t instance_array_offset = array_offset + instance_base_offset;
tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
tables[1][instance_array_offset] = VertexInstances;
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
- SetupDirtyRenderTargets(tables);
+ SetupDirtyFlags(tables);
SetupDirtyColorMasks(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
- SetupDirtyVertexArrays(tables);
+ SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyClipControl(tables);
SetupDirtyDepthClampEnabled(tables);
SetupDirtyMisc(tables);
-
- auto& store = dirty.on_write_stores;
- store[VertexBuffers] = true;
- for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
- store[VertexBuffer0 + i] = true;
- }
-}
-
-void StateTracker::InvalidateStreamBuffer() {
- flags[Dirty::VertexBuffers] = true;
- for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
- flags[index] = true;
- }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 574615d3c..94c905116 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -28,10 +28,6 @@ enum : u8 {
VertexFormat0,
VertexFormat31 = VertexFormat0 + 31,
- VertexBuffers,
- VertexBuffer0,
- VertexBuffer31 = VertexBuffer0 + 31,
-
VertexInstances,
VertexInstance0,
VertexInstance31 = VertexInstance0 + 31,
@@ -92,8 +88,6 @@ class StateTracker {
public:
explicit StateTracker(Tegra::GPU& gpu);
- void InvalidateStreamBuffer();
-
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
return;
@@ -110,13 +104,32 @@ public:
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
}
+ void ClipControl(GLenum new_origin, GLenum new_depth) {
+ if (new_origin == origin && new_depth == depth) {
+ return;
+ }
+ origin = new_origin;
+ depth = new_depth;
+ glClipControl(origin, depth);
+ }
+
+ void SetYNegate(bool new_y_negate) {
+ if (new_y_negate == y_negate) {
+ return;
+ }
+ // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
+ y_negate = new_y_negate;
+ const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
+ glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
+ }
+
void NotifyScreenDrawVertexArray() {
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
- flags[OpenGL::Dirty::VertexBuffers] = true;
- flags[OpenGL::Dirty::VertexBuffer0] = true;
+ flags[VideoCommon::Dirty::VertexBuffers] = true;
+ flags[VideoCommon::Dirty::VertexBuffer0] = true;
flags[OpenGL::Dirty::VertexInstances] = true;
flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -202,6 +215,9 @@ private:
GLuint framebuffer = 0;
GLuint index_buffer = 0;
+ GLenum origin = GL_LOWER_LEFT;
+ GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
+ bool y_negate = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e0819cdf2..77b3ee0fe 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,70 +1,64 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <tuple>
-#include <vector>
+#include <array>
+#include <memory>
+#include <span>
+
+#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
- MP_RGB(128, 128, 192));
-
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
- : state_tracker{state_tracker_} {
- gl_buffer.Create();
-
- static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
- glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
- mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
-
- if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
- glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
- glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+StreamBuffer::StreamBuffer() {
+ static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
+ buffer.Create();
+ glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
+ glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
+ mapped_pointer =
+ static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
+ for (OGLSync& sync : fences) {
+ sync.Create();
}
}
-OGLStreamBuffer::~OGLStreamBuffer() {
- glUnmapNamedBuffer(gl_buffer.handle);
- gl_buffer.Release();
-}
-
-std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
- ASSERT(size <= BUFFER_SIZE);
- ASSERT(alignment <= BUFFER_SIZE);
- mapped_size = size;
-
- if (alignment > 0) {
- buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
+std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
+ ASSERT(size < REGION_SIZE);
+ for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
+ ++region) {
+ fences[region].Create();
}
+ used_iterator = iterator;
- if (buffer_pos + size > BUFFER_SIZE) {
- MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
- glInvalidateBufferData(gl_buffer.handle);
- state_tracker.InvalidateStreamBuffer();
-
- buffer_pos = 0;
+ for (size_t region = Region(free_iterator) + 1,
+ region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
+ region < region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
}
-
- return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
-}
-
-void OGLStreamBuffer::Unmap(GLsizeiptr size) {
- ASSERT(size <= mapped_size);
-
- if (size > 0) {
- glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
+ if (iterator + size >= free_iterator) {
+ free_iterator = iterator + size;
}
-
- buffer_pos += size;
+ if (iterator + size > STREAM_BUFFER_SIZE) {
+ for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
+ fences[region].Create();
+ }
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
+ glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
+ fences[region].Release();
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index dd9cf67eb..6dbb6bfba 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,9 +1,12 @@
-// Copyright 2018 Citra Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
+#include <array>
+#include <memory>
+#include <span>
#include <utility>
#include <glad/glad.h>
@@ -13,48 +16,35 @@
namespace OpenGL {
-class Device;
-class StateTracker;
+class StreamBuffer {
+ static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
+ static constexpr size_t NUM_SYNCS = 16;
+ static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
+ static constexpr size_t MAX_ALIGNMENT = 256;
+ static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
+ static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
+ static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
-class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
- ~OGLStreamBuffer();
-
- /*
- * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
- * and the optional alignment requirement.
- * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
- * The return values are the pointer to the new chunk, and the offset within the buffer.
- * The actual used size must be specified on unmapping the chunk.
- */
- std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
-
- void Unmap(GLsizeiptr size);
-
- GLuint Handle() const {
- return gl_buffer.handle;
- }
+ explicit StreamBuffer();
- u64 Address() const {
- return gpu_address;
- }
+ [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
- GLsizeiptr Size() const noexcept {
- return BUFFER_SIZE;
+ [[nodiscard]] GLuint Handle() const noexcept {
+ return buffer.handle;
}
private:
- static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
-
- StateTracker& state_tracker;
-
- OGLBuffer gl_buffer;
+ [[nodiscard]] static size_t Region(size_t offset) noexcept {
+ return offset / REGION_SIZE;
+ }
- GLuint64EXT gpu_address = 0;
- GLintptr buffer_pos = 0;
- GLsizeiptr mapped_size = 0;
- u8* mapped_ptr = nullptr;
+ size_t iterator = 0;
+ size_t used_iterator = 0;
+ size_t free_iterator = 0;
+ u8* mapped_pointer = nullptr;
+ OGLBuffer buffer;
+ std::array<OGLSync, NUM_SYNCS> fences;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 546cb6d00..12434db67 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
} // Anonymous namespace
-ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
- : span(map, size), sync{sync_}, handle{handle_} {}
-
ImageBufferMap::~ImageBufferMap() {
if (sync) {
sync->Create();
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
glFinish();
}
-ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
+ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
-ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
+ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
@@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
}
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
- size_t buffer_offset,
std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) {
case ImageType::e2D:
- return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
+ return util_shaders.BlockLinearUpload2D(image, map, swizzles);
case ImageType::e3D:
- return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
+ return util_shaders.BlockLinearUpload3D(image, map, swizzles);
case ImageType::Linear:
- return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
+ return util_shaders.PitchUpload(image, map, swizzles);
default:
UNREACHABLE();
break;
@@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
- return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
+ return ImageBufferMap{
+ .mapped_span = std::span(maps[index], requested_size),
+ .sync = sync,
+ .buffer = buffers[index].handle,
+ };
}
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
}
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
- glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
+ glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
}
- CopyBufferToImage(copy, buffer_offset);
+ CopyBufferToImage(copy, map.offset);
}
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
+ glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
copy.dst_offset, copy.size);
}
}
-void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
+void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
- glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
u32 current_row_length = std::numeric_limits<u32>::max();
@@ -759,7 +759,38 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
current_image_height = copy.buffer_image_height;
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
}
- CopyImageToBuffer(copy, buffer_offset);
+ CopyImageToBuffer(copy, map.offset);
+ }
+}
+
+GLuint Image::StorageHandle() noexcept {
+ switch (info.format) {
+ case PixelFormat::A8B8G8R8_SRGB:
+ case PixelFormat::B8G8R8A8_SRGB:
+ case PixelFormat::BC1_RGBA_SRGB:
+ case PixelFormat::BC2_SRGB:
+ case PixelFormat::BC3_SRGB:
+ case PixelFormat::BC7_SRGB:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ case PixelFormat::ASTC_2D_6X6_SRGB:
+ case PixelFormat::ASTC_2D_10X10_SRGB:
+ case PixelFormat::ASTC_2D_12X12_SRGB:
+ case PixelFormat::ASTC_2D_8X6_SRGB:
+ case PixelFormat::ASTC_2D_6X5_SRGB:
+ if (store_view.handle != 0) {
+ return store_view.handle;
+ }
+ store_view.Create();
+ glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
+ info.resources.levels, 0, info.resources.layers);
+ return store_view.handle;
+ default:
+ return texture.handle;
}
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 15b7c3676..a6172f009 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -31,23 +31,13 @@ using VideoCommon::NUM_RT;
using VideoCommon::Offset2D;
using VideoCommon::RenderTargets;
-class ImageBufferMap {
-public:
- explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
+struct ImageBufferMap {
~ImageBufferMap();
- GLuint Handle() const noexcept {
- return handle;
- }
-
- std::span<u8> Span() const noexcept {
- return span;
- }
-
-private:
- std::span<u8> span;
+ std::span<u8> mapped_span;
+ size_t offset = 0;
OGLSync* sync;
- GLuint handle;
+ GLuint buffer;
};
struct FormatProperties {
@@ -69,9 +59,9 @@ public:
void Finish();
- ImageBufferMap MapUploadBuffer(size_t size);
+ ImageBufferMap UploadStagingBuffer(size_t size);
- ImageBufferMap MapDownloadBuffer(size_t size);
+ ImageBufferMap DownloadStagingBuffer(size_t size);
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
@@ -89,7 +79,7 @@ public:
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier();
@@ -148,14 +138,14 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
- std::span<const VideoCommon::BufferCopy> copies);
+ void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
+
+ void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
- void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
- std::span<const VideoCommon::BufferImageCopy> copies);
+ GLuint StorageHandle() noexcept;
GLuint Handle() const noexcept {
return texture.handle;
@@ -167,8 +157,8 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
- OGLTextureView store_view;
OGLBuffer buffer;
+ OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
GLenum gl_type = GL_NONE;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index cbccfdeb4..f7ad8f370 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -4,23 +4,10 @@
#pragma once
-#include <array>
#include <glad/glad.h>
-#include "common/common_types.h"
-#include "common/logging/log.h"
#include "video_core/engines/maxwell_3d.h"
-namespace OpenGL {
-
-using GLvec2 = std::array<GLfloat, 2>;
-using GLvec3 = std::array<GLfloat, 3>;
-using GLvec4 = std::array<GLfloat, 4>;
-
-using GLuvec2 = std::array<GLuint, 2>;
-using GLuvec3 = std::array<GLuint, 3>;
-using GLuvec4 = std::array<GLuint, 4>;
-
-namespace MaxwellToGL {
+namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -317,26 +304,6 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
return GL_ZERO;
}
-inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
- switch (source) {
- case Tegra::Texture::SwizzleSource::Zero:
- return GL_ZERO;
- case Tegra::Texture::SwizzleSource::R:
- return GL_RED;
- case Tegra::Texture::SwizzleSource::G:
- return GL_GREEN;
- case Tegra::Texture::SwizzleSource::B:
- return GL_BLUE;
- case Tegra::Texture::SwizzleSource::A:
- return GL_ALPHA;
- case Tegra::Texture::SwizzleSource::OneInt:
- case Tegra::Texture::SwizzleSource::OneFloat:
- return GL_ONE;
- }
- UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
- return GL_ZERO;
-}
-
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
@@ -493,5 +460,4 @@ inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
}
-} // namespace MaxwellToGL
-} // namespace OpenGL
+} // namespace OpenGL::MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 21159e498..9d2acd4d9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -29,9 +29,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
break;
}
}
-
} // Anonymous namespace
RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
@@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
- emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
+ program_manager{device},
+ rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
+ if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
+ glEnable(GL_DEBUG_OUTPUT);
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+ glDebugMessageCallback(DebugHandler, nullptr);
+ }
+ AddTelemetryFields();
+ InitOpenGLObjects();
+}
RendererOpenGL::~RendererOpenGL() = default;
@@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
++m_current_frame;
- rasterizer->TickFrame();
+ rasterizer.TickFrame();
context->SwapBuffers();
render_window.OnFrameDisplayed();
@@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
framebuffer_crop_rect = framebuffer.crop_rect;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
- if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
return;
}
@@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
if (device.HasVertexBufferUnifiedMemory()) {
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+ glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() {
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
-void RendererOpenGL::CreateRasterizer() {
- if (rasterizer) {
- return;
- }
- rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
- screen_info, program_manager, state_tracker);
-}
-
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
@@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.BindHostPipeline(pipeline.handle);
+ state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
glEnable(GL_FRAMEBUFFER_SRGB);
@@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
- glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
glDepthRangeIndexed(0, 0.0, 0.0);
@@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
renderer_settings.screenshot_requested = false;
}
-bool RendererOpenGL::Init() {
- if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
- glEnable(GL_DEBUG_OUTPUT);
- glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
- glDebugMessageCallback(DebugHandler, nullptr);
- }
-
- AddTelemetryFields();
-
- if (!GLAD_GL_VERSION_4_6) {
- return false;
- }
-
- InitOpenGLObjects();
- CreateRasterizer();
-
- return true;
-}
-
-void RendererOpenGL::ShutDown() {}
-
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 44e109794..cc19a110f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -63,18 +64,18 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererOpenGL() override;
- bool Init() override;
- void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+ VideoCore::RasterizerInterface* ReadRasterizer() override {
+ return &rasterizer;
+ }
+
private:
/// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
void AddTelemetryFields();
- void CreateRasterizer();
-
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
@@ -98,8 +99,10 @@ private:
Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu;
- const Device device;
- StateTracker state_tracker{gpu};
+ Device device;
+ StateTracker state_tracker;
+ ProgramManager program_manager;
+ RasterizerOpenGL rasterizer;
// OpenGL object IDs
OGLSampler present_sampler;
@@ -115,9 +118,6 @@ private:
/// Display information for Switch screen
ScreenInfo screen_info;
- /// Global dummy shader pipeline
- ProgramManager program_manager;
-
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index eb849cbf2..31ec68505 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -91,16 +91,16 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -132,16 +132,16 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,21 +159,22 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
+ format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
@@ -195,9 +196,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
- glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
- GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+ copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 359997255..7b1d16b09 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -15,21 +15,22 @@
namespace OpenGL {
class Image;
-class ImageBufferMap;
class ProgramManager;
+struct ImageBufferMap;
+
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders();
- void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
- void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+ void PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5be6dabd9..362278f01 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -12,14 +12,15 @@
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
namespace {
-constexpr std::size_t POINT = 0;
-constexpr std::size_t LINE = 1;
-constexpr std::size_t POLYGON = 2;
+constexpr size_t POINT = 0;
+constexpr size_t LINE = 1;
+constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
@@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
} // Anonymous namespace
-void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
- const std::array enabled_lut = {regs.polygon_offset_point_enable,
- regs.polygon_offset_line_enable,
- regs.polygon_offset_fill_enable};
+void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
+ bool has_extended_dynamic_state) {
+ const Maxwell& regs = maxwell3d.regs;
+ const std::array enabled_lut{
+ regs.polygon_offset_point_enable,
+ regs.polygon_offset_line_enable,
+ regs.polygon_offset_fill_enable,
+ };
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
@@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
raw2 = 0;
const auto test_func =
- regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
+ regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size);
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- binding_divisors[index] =
- regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
+ if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
+ maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
+ for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
+ binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
+ }
}
-
- for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
- const auto& input = regs.vertex_attrib_format[index];
- auto& attribute = attributes[index];
- attribute.raw = 0;
- attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
- attribute.buffer.Assign(input.buffer);
- attribute.offset.Assign(input.offset);
- attribute.type.Assign(static_cast<u32>(input.type.Value()));
- attribute.size.Assign(static_cast<u32>(input.size.Value()));
- attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0);
+ if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
+ maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ const auto& input = regs.vertex_attrib_format[index];
+ auto& attribute = attributes[index];
+ attribute.raw = 0;
+ attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
+ attribute.buffer.Assign(input.buffer);
+ attribute.offset.Assign(input.offset);
+ attribute.type.Assign(static_cast<u32>(input.type.Value()));
+ attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ }
}
-
- for (std::size_t index = 0; index < std::size(attachments); ++index) {
- attachments[index].Fill(regs, index);
+ if (maxwell3d.dirty.flags[Dirty::Blending]) {
+ maxwell3d.dirty.flags[Dirty::Blending] = false;
+ for (size_t index = 0; index < attachments.size(); ++index) {
+ attachments[index].Refresh(regs, index);
+ }
+ }
+ if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
+ maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
+ const auto& transform = regs.viewport_transform;
+ std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
+ return static_cast<u16>(viewport.swizzle.raw);
+ });
}
-
- const auto& transform = regs.viewport_transform;
- std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
- [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
-
if (!has_extended_dynamic_state) {
no_extended_dynamic_state.Assign(1);
- dynamic_state.Fill(regs);
+ dynamic_state.Refresh(regs);
}
}
-void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
+void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0;
@@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
enable.Assign(1);
}
-void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
+void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face
@@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
});
}
-std::size_t FixedPipelineState::Hash() const noexcept {
+size_t FixedPipelineState::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
- return static_cast<std::size_t>(hash);
+ return static_cast<size_t>(hash);
}
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 465a55fdb..a0eb83a68 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -58,7 +58,7 @@ struct FixedPipelineState {
BitField<30, 1, u32> enable;
};
- void Fill(const Maxwell& regs, std::size_t index);
+ void Refresh(const Maxwell& regs, size_t index);
constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
@@ -96,8 +96,6 @@ struct FixedPipelineState {
BitField<6, 14, u32> offset;
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
- // Not really an element of a vertex attribute, but it can be packed here
- BitField<29, 1, u32> binding_index_enabled;
constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
@@ -108,7 +106,7 @@ struct FixedPipelineState {
}
};
- template <std::size_t Position>
+ template <size_t Position>
union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail;
@@ -152,7 +150,7 @@ struct FixedPipelineState {
// Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
- void Fill(const Maxwell& regs);
+ void Refresh(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func);
@@ -199,9 +197,9 @@ struct FixedPipelineState {
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
DynamicState dynamic_state;
- void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
+ void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
- std::size_t Hash() const noexcept;
+ size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept;
@@ -209,8 +207,8 @@ struct FixedPipelineState {
return !operator==(rhs);
}
- std::size_t Size() const noexcept {
- const std::size_t total_size = sizeof *this;
+ size_t Size() const noexcept {
+ const size_t total_size = sizeof *this;
return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
}
};
@@ -224,7 +222,7 @@ namespace std {
template <>
struct hash<Vulkan::FixedPipelineState> {
- std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
+ size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash();
}
};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
return {};
}
-VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
- if (!device.IsExtIndexTypeUint8Supported()) {
- UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
- return VK_INDEX_TYPE_UINT16;
- }
return VK_INDEX_TYPE_UINT8_EXT;
case Maxwell::IndexFormat::UnsignedShort:
return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
-VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 61796e33a..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
return separated_extensions;
}
+Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
+ VkSurfaceKHR surface) {
+ const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
+ const s32 device_index = Settings::values.vulkan_device.GetValue();
+ if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+ LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+ throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
+ }
+ const vk::PhysicalDevice physical_device(devices[device_index], dld);
+ return Device(*instance, physical_device, surface, dld);
+}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
- std::unique_ptr<Core::Frontend::GraphicsContext> context_)
- : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_},
- cpu_memory{cpu_memory_}, gpu{gpu_} {}
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
+ : RendererBase(emu_window, std::move(context_)),
+ telemetry_session(telemetry_session_),
+ cpu_memory(cpu_memory_),
+ gpu(gpu_),
+ library(OpenLibrary()),
+ instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
+ true, Settings::values.renderer_debug)),
+ debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
+ surface(CreateSurface(instance, render_window)),
+ device(CreateDevice(instance, dld, *surface)),
+ memory_allocator(device, false),
+ state_tracker(gpu),
+ scheduler(device, state_tracker),
+ swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
+ render_window.GetFramebufferLayout().height, false),
+ blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
+ screen_info),
+ rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
+ memory_allocator, state_tracker, scheduler) {
+ Report();
+} catch (const vk::Exception& exception) {
+ LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
+ throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
+}
RendererVulkan::~RendererVulkan() {
- ShutDown();
+ void(device.GetLogical().WaitIdle());
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
- rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+ rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
- if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
- swapchain->Create(layout.width, layout.height, is_srgb);
- blit_screen->Recreate();
+ if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
+ swapchain.Create(layout.width, layout.height, is_srgb);
+ blit_screen.Recreate();
}
- scheduler->WaitWorker();
+ scheduler.WaitWorker();
- swapchain->AcquireNextImage();
- const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
+ swapchain.AcquireNextImage();
+ const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
- scheduler->Flush(render_semaphore);
+ scheduler.Flush(render_semaphore);
- if (swapchain->Present(render_semaphore)) {
- blit_screen->Recreate();
+ if (swapchain.Present(render_semaphore)) {
+ blit_screen.Recreate();
}
-
- rasterizer->TickFrame();
+ rasterizer.TickFrame();
}
render_window.OnFrameDisplayed();
}
-bool RendererVulkan::Init() try {
- library = OpenLibrary();
- instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
- true, Settings::values.renderer_debug);
- if (Settings::values.renderer_debug) {
- debug_callback = CreateDebugCallback(instance);
- }
- surface = CreateSurface(instance, render_window);
-
- InitializeDevice();
- Report();
-
- memory_allocator = std::make_unique<MemoryAllocator>(*device);
-
- state_tracker = std::make_unique<StateTracker>(gpu);
-
- scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
-
- const auto& framebuffer = render_window.GetFramebufferLayout();
- swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
- swapchain->Create(framebuffer.width, framebuffer.height, false);
-
- rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
- cpu_memory, screen_info, *device,
- *memory_allocator, *state_tracker, *scheduler);
-
- blit_screen =
- std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
- *memory_allocator, *swapchain, *scheduler, screen_info);
- return true;
-
-} catch (const vk::Exception& exception) {
- LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
- return false;
-}
-
-void RendererVulkan::ShutDown() {
- if (!device) {
- return;
- }
- if (const auto& dev = device->GetLogical()) {
- dev.WaitIdle();
- }
- rasterizer.reset();
- blit_screen.reset();
- scheduler.reset();
- swapchain.reset();
- memory_allocator.reset();
- device.reset();
-}
-
-void RendererVulkan::InitializeDevice() {
- const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
- const s32 device_index = Settings::values.vulkan_device.GetValue();
- if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
- LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
- throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
- }
- const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
- device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
-}
-
void RendererVulkan::Report() const {
- const std::string vendor_name{device->GetVendorName()};
- const std::string model_name{device->GetModelName()};
- const std::string driver_version = GetDriverVersion(*device);
+ const std::string vendor_name{device.GetVendorName()};
+ const std::string model_name{device.GetModelName()};
+ const std::string driver_version = GetDriverVersion(device);
const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
- const std::string api_version = GetReadableVersion(device->ApiVersion());
+ const std::string api_version = GetReadableVersion(device.ApiVersion());
- const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+ const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
-std::vector<std::string> RendererVulkan::EnumerateDevices() try {
- vk::InstanceDispatch dld;
- const Common::DynamicLibrary library = OpenLibrary();
- const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
- const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
- std::vector<std::string> names;
- names.reserve(physical_devices.size());
- for (const VkPhysicalDevice device : physical_devices) {
- names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
- }
- return names;
-
-} catch (const vk::Exception& exception) {
- LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
- return {};
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index daf55b9b4..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,8 +9,14 @@
#include <vector>
#include "common/dynamic_library.h"
-
#include "video_core/renderer_base.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_state_tracker.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
@@ -27,20 +33,6 @@ class GPU;
namespace Vulkan {
-class Device;
-class StateTracker;
-class MemoryAllocator;
-class VKBlitScreen;
-class VKSwapchain;
-class VKScheduler;
-
-struct VKScreenInfo {
- VkImageView image_view{};
- u32 width{};
- u32 height{};
- bool is_srgb{};
-};
-
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -49,15 +41,13 @@ public:
std::unique_ptr<Core::Frontend::GraphicsContext> context_);
~RendererVulkan() override;
- bool Init() override;
- void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- static std::vector<std::string> EnumerateDevices();
+ VideoCore::RasterizerInterface* ReadRasterizer() override {
+ return &rasterizer;
+ }
private:
- void InitializeDevice();
-
void Report() const;
Core::TelemetrySession& telemetry_session;
@@ -68,18 +58,18 @@ private:
vk::InstanceDispatch dld;
vk::Instance instance;
-
+ vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
- vk::DebugUtilsMessenger debug_callback;
- std::unique_ptr<Device> device;
- std::unique_ptr<MemoryAllocator> memory_allocator;
- std::unique_ptr<StateTracker> state_tracker;
- std::unique_ptr<VKScheduler> scheduler;
- std::unique_ptr<VKSwapchain> swapchain;
- std::unique_ptr<VKBlitScreen> blit_screen;
+ Device device;
+ MemoryAllocator memory_allocator;
+ StateTracker state_tracker;
+ VKScheduler scheduler;
+ VKSwapchain swapchain;
+ VKBlitScreen blit_screen;
+ RasterizerVulkan rasterizer;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 3e3b895e0..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -18,7 +18,6 @@
#include "video_core/gpu.h"
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
-#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
};
VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
- Core::Frontend::EmuWindow& render_window_,
- VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+ Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
- : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
- device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_},
- scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+ : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
+ memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
+ image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
resource_ticks.resize(image_count);
CreateStaticResources();
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
- const std::span<u8> map = buffer_commit.Map();
- std::memcpy(map.data(), &data, sizeof(data));
+ const std::span<u8> mapped_span = buffer_commit.Map();
+ std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
const size_t size_bytes = GetSizeInBytes(framebuffer);
- rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
// TODO(Rodrigo): Read this from HLE
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
Tegra::Texture::UnswizzleTexture(
- map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
- framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
+ mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
+ bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
-
return *semaphores[image_index];
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index b52576957..5e3177685 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -38,12 +38,18 @@ class RasterizerVulkan;
class VKScheduler;
class VKSwapchain;
-class VKBlitScreen final {
+struct VKScreenInfo {
+ VkImageView image_view{};
+ u32 width{};
+ u32 height{};
+ bool is_srgb{};
+};
+
+class VKBlitScreen {
public:
explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
- Core::Frontend::EmuWindow& render_window,
- VideoCore::RasterizerInterface& rasterizer, const Device& device,
- MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
+ Core::Frontend::EmuWindow& render_window, const Device& device,
+ MemoryAllocator& memory_manager, VKSwapchain& swapchain,
VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
@@ -84,7 +90,6 @@ private:
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
- VideoCore::RasterizerInterface& rasterizer;
const Device& device;
MemoryAllocator& memory_allocator;
VKSwapchain& swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..668633e7b 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,304 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <array>
#include <cstring>
-#include <memory>
+#include <span>
+#include <vector>
-#include "core/core.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
+VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
+ return VkBufferCopy{
+ .srcOffset = copy.src_offset,
+ .dstOffset = copy.dst_offset,
+ .size = copy.size,
+ };
+}
-constexpr VkBufferUsageFlags BUFFER_USAGE =
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-
-constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
- VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
-
-constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
+VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
+ if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
+ return VK_INDEX_TYPE_UINT8_EXT;
+ }
+ if (num_elements <= 0xffff) {
+ return VK_INDEX_TYPE_UINT16;
+ }
+ return VK_INDEX_TYPE_UINT32;
+}
-constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
- VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+size_t BytesPerIndex(VkIndexType index_type) {
+ switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ return 1;
+ case VK_INDEX_TYPE_UINT16:
+ return 2;
+ case VK_INDEX_TYPE_UINT32:
+ return 4;
+ default:
+ UNREACHABLE_MSG("Invalid index type={}", index_type);
+ return 1;
+ }
+}
+template <typename T>
+std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
+ std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
+ std::ranges::transform(indices, indices.begin(),
+ [quad, first](u32 index) { return first + index + quad * 4; });
+ return indices;
+}
} // Anonymous namespace
-Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
- StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
- : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
- staging_pool_} {
- buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+ buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = static_cast<VkDeviceSize>(size_),
- .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .size = SizeBytes(),
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
- commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+ if (runtime.device.HasDebuggingToolAttached()) {
+ buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
+ }
+ commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
}
-Buffer::~Buffer() = default;
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
+ VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKDescriptorPool& descriptor_pool)
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+ staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
+ uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+ quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
- const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
- std::memcpy(staging.mapped_span.data(), data, data_size);
+StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_pool.Request(size, MemoryUsage::Upload);
+}
- scheduler.RequestOutsideRenderPassOperationContext();
+StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_pool.Request(size, MemoryUsage::Download);
+}
- const VkBuffer handle = Handle();
- scheduler.Record([staging = staging.buffer, handle, offset, data_size,
- &device = device](vk::CommandBuffer cmdbuf) {
- const VkBufferMemoryBarrier read_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask =
- VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
- VK_ACCESS_HOST_WRITE_BIT |
- (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = data_size,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = data_size,
- };
+void BufferCacheRuntime::Finish() {
+ scheduler.Finish();
+}
+
+void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ static constexpr VkMemoryBarrier READ_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ };
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ // Measuring a popular game, this number never exceeds the specified size once data is warmed up
+ boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, read_barrier);
- cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
- write_barrier);
+ 0, READ_BARRIER);
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, WRITE_BARRIER);
});
}
-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
- auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
- scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
+ u32 base_vertex, u32 num_indices, VkBuffer buffer,
+ u32 offset, [[maybe_unused]] u32 size) {
+ VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
+ VkDeviceSize vk_offset = offset;
+ VkBuffer vk_buffer = buffer;
+ if (topology == PrimitiveTopology::Quads) {
+ vk_index_type = VK_INDEX_TYPE_UINT32;
+ std::tie(vk_buffer, vk_offset) =
+ quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
+ } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
+ vk_index_type = VK_INDEX_TYPE_UINT16;
+ std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+ }
+ if (vk_buffer == VK_NULL_HANDLE) {
+ // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
+ ReserveNullIndexBuffer();
+ vk_buffer = *null_index_buffer;
+ }
+ scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
+ });
+}
- const VkBuffer handle = Handle();
- scheduler.Record(
- [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
- const VkBufferMemoryBarrier barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = data_size,
- };
-
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
- cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
- });
- scheduler.Finish();
+void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+ ReserveQuadArrayLUT(first + count, true);
- std::memcpy(data, staging.mapped_span.data(), data_size);
+ // The LUT has the indices 0, 1, 2, and 3 copied as an array
+ // To apply these 'first' offsets we can apply an offset based on the modulus.
+ const VkIndexType index_type = quad_array_lut_index_type;
+ const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
+ const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
+ scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(buffer, offset, index_type);
+ });
}
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size) {
- scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
+ u32 stride) {
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
+ const VkDeviceSize vk_offset = offset;
+ const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
+ const VkDeviceSize vk_stride = stride;
+ cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
+ });
+ } else {
+ scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffer(index, buffer, offset);
+ });
+ }
+}
- const VkBuffer dst_buffer = Handle();
- scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
- copy_size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
-
- std::array<VkBufferMemoryBarrier, 2> barriers;
- barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barriers[0].pNext = nullptr;
- barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
- barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[0].buffer = src_buffer;
- barriers[0].offset = src_offset;
- barriers[0].size = copy_size;
- barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barriers[1].pNext = nullptr;
- barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
- barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[1].buffer = dst_buffer;
- barriers[1].offset = dst_offset;
- barriers[1].size = copy_size;
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
- barriers, {});
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
+ u32 size) {
+ if (!device.IsExtTransformFeedbackSupported()) {
+ // Already logged in the rasterizer
+ return;
+ }
+ scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
+ const VkDeviceSize vk_offset = offset;
+ const VkDeviceSize vk_size = size;
+ cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- const Device& device_, MemoryAllocator& memory_allocator_,
- VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
- StagingBufferPool& staging_pool_)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
- cpu_memory_, stream_buffer_},
- device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
- staging_pool{staging_pool_} {}
-
-VKBufferCache::~VKBufferCache() = default;
-
-std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
- size);
+void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
+ if (num_indices <= current_num_indices) {
+ return;
+ }
+ if (wait_for_idle) {
+ scheduler.Finish();
+ }
+ current_num_indices = num_indices;
+ quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
+
+ const u32 num_quads = num_indices / 4;
+ const u32 num_triangle_indices = num_quads * 6;
+ const u32 num_first_offset_copies = 4;
+ const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
+ const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
+ quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = size_bytes,
+ .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ quad_array_lut.SetObjectNameEXT("Quad LUT");
+ }
+ quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
+
+ const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ u8* staging_data = staging.mapped_span.data();
+ const size_t quad_size = bytes_per_index * 6;
+ for (u32 first = 0; first < num_first_offset_copies; ++first) {
+ for (u32 quad = 0; quad < num_quads; ++quad) {
+ switch (quad_array_lut_index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT16:
+ std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT32:
+ std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ staging_data += quad_size;
+ }
+ }
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
+ dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = src_offset,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+ 0, write_barrier);
+ });
}
-VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
- size = std::max(size, std::size_t(4));
- const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
+void BufferCacheRuntime::ReserveNullIndexBuffer() {
+ if (null_index_buffer) {
+ return;
+ }
+ null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = 4,
+ .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ null_index_buffer.SetObjectNameEXT("Null index buffer");
+ }
+ null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
+
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, size, 0);
+ scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
- return {empty.buffer, 0, 0};
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..982e92191 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,126 @@
#pragma once
-#include <memory>
-
-#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
+class VKDescriptorPool;
class VKScheduler;
-class Buffer final : public VideoCommon::BufferBlock {
-public:
- explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
- StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
- ~Buffer();
-
- void Upload(std::size_t offset, std::size_t data_size, const u8* data);
+class BufferCacheRuntime;
- void Download(std::size_t offset, std::size_t data_size, u8* data);
-
- void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size);
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
+public:
+ explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
+ explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_);
- VkBuffer Handle() const {
+ [[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
- u64 Address() const {
- return 0;
+ operator VkBuffer() const noexcept {
+ return *buffer;
}
private:
- const Device& device;
- VKScheduler& scheduler;
- StagingBufferPool& staging_pool;
-
vk::Buffer buffer;
MemoryCommit commit;
};
-class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
+class BufferCacheRuntime {
+ friend Buffer;
+
+ using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
+ using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
+
public:
- explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
- Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
- const Device& device, MemoryAllocator& memory_allocator,
- VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
- StagingBufferPool& staging_pool);
- ~VKBufferCache();
+ explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
+ VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKDescriptorPool& descriptor_pool);
+
+ void Finish();
+
+ [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
+
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+
+ void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+ std::span<const VideoCommon::BufferCopy> copies);
+
+ void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
+ u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
+
+ void BindQuadArrayIndexBuffer(u32 first, u32 count);
- BufferInfo GetEmptyBuffer(std::size_t size) override;
+ void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
-protected:
- std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+ void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+
+ std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
+ [[maybe_unused]] u32 binding_index, u32 size) {
+ const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
+ BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
+ return ref.mapped_span;
+ }
+
+ void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ BindBuffer(buffer, offset, size);
+ }
+
+ void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
+ [[maybe_unused]] bool is_written) {
+ BindBuffer(buffer, offset, size);
+ }
private:
+ void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ update_descriptor_queue.AddBuffer(buffer, offset, size);
+ }
+
+ void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
+
+ void ReserveNullIndexBuffer();
+
const Device& device;
MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
StagingBufferPool& staging_pool;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
+
+ vk::Buffer quad_array_lut;
+ MemoryCommit quad_array_lut_commit;
+ VkIndexType quad_array_lut_index_type{};
+ u32 current_num_indices = 0;
+
+ vk::Buffer null_index_buffer;
+ MemoryCommit null_index_buffer_commit;
+
+ Uint8Pass uint8_pass;
+ QuadIndexedPass quad_index_pass;
};
+struct BufferCacheParams {
+ using Runtime = Vulkan::BufferCacheRuntime;
+ using Buffer = Vulkan::Buffer;
+
+ static constexpr bool IS_OPENGL = false;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
+ static constexpr bool USE_MEMORY_MAPS = true;
+};
+
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
-#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
+#include "common/div_ceil.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +22,7 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
-
-VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
- return {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- };
-}
-
-VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
- return {
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = 0,
- .stride = sizeof(DescriptorUpdateEntry),
- };
-}
-
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
return set;
}
-QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
- BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
- BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
- scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
- update_descriptor_queue{update_descriptor_queue_} {}
-
-QuadArrayPass::~QuadArrayPass() = default;
-
-std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
- const u32 num_triangle_vertices = (num_vertices / 4) * 6;
- const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
- const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
-
- update_descriptor_queue.Acquire();
- update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
-
- scheduler.RequestOutsideRenderPassOperationContext();
-
- ASSERT(num_vertices % 4 == 0);
- const u32 num_quads = num_vertices / 4;
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
- num_quads, first, set](vk::CommandBuffer cmdbuf) {
- constexpr u32 dispatch_size = 1024;
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
- cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
- });
- return {staging_ref.buffer, 0};
-}
-
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
Uint8Pass::~Uint8Pass() = default;
-std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
- u64 src_offset) {
+std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
+ u32 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
- const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
+ const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
- update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
+ update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
+ scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_vertices](vk::CommandBuffer cmdbuf) {
- constexpr u32 dispatch_size = 1024;
+ static constexpr u32 DISPATCH_SIZE = 1024;
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ };
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
+ cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
- return {staging_ref.buffer, 0};
+ return {staging.buffer, staging.offset};
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
QuadIndexedPass::~QuadIndexedPass() = default;
-std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
+std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
- VkBuffer src_buffer, u64 src_offset) {
+ VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
- const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
+ const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
- update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
+ update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
+ scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
- static constexpr u32 dispatch_size = 1024;
+ static constexpr u32 DISPATCH_SIZE = 1024;
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ };
const std::array push_constants = {base_vertex, index_shift};
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
- cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
+ cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
});
- return {staging_ref.buffer, 0};
+ return {staging.buffer, staging.offset};
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
vk::ShaderModule module;
};
-class QuadArrayPass final : public VKComputePass {
-public:
- explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_);
- ~QuadArrayPass();
-
- std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
-
-private:
- VKScheduler& scheduler;
- StagingBufferPool& staging_buffer_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
-};
-
class Uint8Pass final : public VKComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,10 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
- std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
+ /// Assemble uint8 indices into an uint16 index buffer
+ /// Returns a pair with the staging buffer, and the offset where the assembled data is
+ std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
+ u32 src_offset);
private:
VKScheduler& scheduler;
@@ -80,9 +67,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
- std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
- u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
- u64 src_offset);
+ std::pair<VkBuffer, VkDeviceSize> Assemble(
+ Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
+ u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
private:
VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
}
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
- VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ VKQueryCache& query_cache_, const Device& device_,
VKScheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
namespace Vulkan {
class Device;
-class VKBufferCache;
class VKQueryCache;
class VKScheduler;
@@ -45,14 +44,14 @@ private:
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
+ VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
- VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
- VKScheduler& scheduler_);
+ explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ VKQueryCache& query_cache, const Device& device,
+ VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d50dca604..fc6dd83eb 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (state.attributes[index].binding_index_enabled == 0) {
- continue;
- }
const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vertex_bindings.push_back({
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index f336f1862..2c7ed654d 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -21,7 +21,12 @@ public:
/// Returns the current logical tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
- return current_tick;
+ return current_tick.load(std::memory_order_relaxed);
+ }
+
+ /// Returns the last known GPU tick.
+ [[nodiscard]] u64 KnownGpuTick() const noexcept {
+ return gpu_tick.load(std::memory_order_relaxed);
}
/// Returns the timeline semaphore handle.
@@ -31,7 +36,7 @@ public:
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) {
- return gpu_tick >= tick;
+ return gpu_tick.load(std::memory_order_relaxed) >= tick;
}
/// Advance to the logical tick.
@@ -41,7 +46,7 @@ public:
/// Refresh the known GPU tick
void Refresh() {
- gpu_tick = semaphore.GetCounter();
+ gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed);
}
/// Waits for a tick to be hit on the GPU
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..dfd38f575 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
#include <mutex>
#include <vector>
-#include <boost/container/static_vector.hpp>
-
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -24,7 +22,6 @@
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
namespace {
+struct DrawParams {
+ u32 base_instance;
+ u32 num_instances;
+ u32 base_vertex;
+ u32 num_vertices;
+ bool is_indexed;
+};
constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
-
VkViewport viewport{
.x = src.translate_x - src.scale_x,
.y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
.minDepth = src.translate_z - src.scale_z * reduce_z,
.maxDepth = src.translate_z + src.scale_z,
};
-
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
}
-
return viewport;
}
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
-template <size_t N>
-std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
- std::array<VkDeviceSize, N> expanded;
- std::copy(strides.begin(), strides.end(), expanded.begin());
- return expanded;
-}
-
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
}
}
-} // Anonymous namespace
-
-class BufferBindings final {
-public:
- void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
- vertex.buffers[vertex.num_buffers] = buffer;
- vertex.offsets[vertex.num_buffers] = offset;
- vertex.sizes[vertex.num_buffers] = size;
- vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
- ++vertex.num_buffers;
- }
-
- void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
- index.buffer = buffer;
- index.offset = offset;
- index.type = type;
- }
-
- void Bind(const Device& device, VKScheduler& scheduler) const {
- // Use this large switch case to avoid dispatching more memory in the record lambda than
- // what we need. It looks horrible, but it's the best we can do on standard C++.
- switch (vertex.num_buffers) {
- case 0:
- return BindStatic<0>(device, scheduler);
- case 1:
- return BindStatic<1>(device, scheduler);
- case 2:
- return BindStatic<2>(device, scheduler);
- case 3:
- return BindStatic<3>(device, scheduler);
- case 4:
- return BindStatic<4>(device, scheduler);
- case 5:
- return BindStatic<5>(device, scheduler);
- case 6:
- return BindStatic<6>(device, scheduler);
- case 7:
- return BindStatic<7>(device, scheduler);
- case 8:
- return BindStatic<8>(device, scheduler);
- case 9:
- return BindStatic<9>(device, scheduler);
- case 10:
- return BindStatic<10>(device, scheduler);
- case 11:
- return BindStatic<11>(device, scheduler);
- case 12:
- return BindStatic<12>(device, scheduler);
- case 13:
- return BindStatic<13>(device, scheduler);
- case 14:
- return BindStatic<14>(device, scheduler);
- case 15:
- return BindStatic<15>(device, scheduler);
- case 16:
- return BindStatic<16>(device, scheduler);
- case 17:
- return BindStatic<17>(device, scheduler);
- case 18:
- return BindStatic<18>(device, scheduler);
- case 19:
- return BindStatic<19>(device, scheduler);
- case 20:
- return BindStatic<20>(device, scheduler);
- case 21:
- return BindStatic<21>(device, scheduler);
- case 22:
- return BindStatic<22>(device, scheduler);
- case 23:
- return BindStatic<23>(device, scheduler);
- case 24:
- return BindStatic<24>(device, scheduler);
- case 25:
- return BindStatic<25>(device, scheduler);
- case 26:
- return BindStatic<26>(device, scheduler);
- case 27:
- return BindStatic<27>(device, scheduler);
- case 28:
- return BindStatic<28>(device, scheduler);
- case 29:
- return BindStatic<29>(device, scheduler);
- case 30:
- return BindStatic<30>(device, scheduler);
- case 31:
- return BindStatic<31>(device, scheduler);
- case 32:
- return BindStatic<32>(device, scheduler);
- }
- UNREACHABLE();
- }
-
-private:
- // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
- struct {
- size_t num_buffers = 0;
- std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
- std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
- std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
- std::array<u16, Maxwell::NumVertexArrays> strides;
- } vertex;
-
- struct {
- VkBuffer buffer = nullptr;
- VkDeviceSize offset;
- VkIndexType type;
- } index;
-
- template <size_t N>
- void BindStatic(const Device& device, VKScheduler& scheduler) const {
- if (device.IsExtExtendedDynamicStateSupported()) {
- if (index.buffer) {
- BindStatic<N, true, true>(scheduler);
- } else {
- BindStatic<N, false, true>(scheduler);
- }
- } else {
- if (index.buffer) {
- BindStatic<N, true, false>(scheduler);
- } else {
- BindStatic<N, false, false>(scheduler);
- }
- }
- }
-
- template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
- void BindStatic(VKScheduler& scheduler) const {
- static_assert(N <= Maxwell::NumVertexArrays);
- if constexpr (N == 0) {
- return;
- }
-
- std::array<VkBuffer, N> buffers;
- std::array<VkDeviceSize, N> offsets;
- std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
- std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
-
- if constexpr (has_extended_dynamic_state) {
- // With extended dynamic states we can specify the length and stride of a vertex buffer
- std::array<VkDeviceSize, N> sizes;
- std::array<u16, N> strides;
- std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
- std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
-
- if constexpr (is_indexed) {
- scheduler.Record(
- [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
- cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
- offsets.data(), sizes.data(),
- ExpandStrides(strides).data());
- });
- } else {
- scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
- offsets.data(), sizes.data(),
- ExpandStrides(strides).data());
- });
- }
- return;
- }
-
- if constexpr (is_indexed) {
- // Indexed draw
- scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
- cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
- });
- } else {
- // Array draw
- scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
- });
- }
- }
-};
-
-void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
- if (is_indexed) {
- cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
- } else {
- cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
+DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
+ bool is_indexed) {
+ DrawParams params{
+ .base_instance = regs.vb_base_instance,
+ .num_instances = is_instanced ? num_instances : 1,
+ .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
+ .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+ .is_indexed = is_indexed,
+ };
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ // 6 triangle vertices per quad, base vertex is part of the index
+ // See BindQuadArrayIndexBuffer for more details
+ params.num_vertices = (params.num_vertices / 4) * 6;
+ params.base_vertex = 0;
+ params.is_indexed = true;
}
+ return params;
}
+} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
- state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
+ state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
update_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool),
- quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
+ update_descriptor_queue, descriptor_pool),
+ buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
descriptor_pool, update_descriptor_queue),
- buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
- stream_buffer, staging_pool),
query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
- fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) {
@@ -446,52 +267,51 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
- GraphicsPipelineCacheKey key;
- key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
-
- buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
+ graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
- BufferBindings buffer_bindings;
- const DrawParameters draw_params =
- SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- auto lock = texture_cache.AcquireLock();
texture_cache.SynchronizeGraphicsDescriptors();
-
texture_cache.UpdateRenderTargets(false);
const auto shaders = pipeline_cache.GetShaders();
- key.shaders = GetShaderAddresses(shaders);
- SetupShaderDescriptors(shaders);
+ graphics_key.shaders = GetShaderAddresses(shaders);
- buffer_cache.Unmap();
+ SetupShaderDescriptors(shaders, is_indexed);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
- key.renderpass = framebuffer->RenderPass();
+ graphics_key.renderpass = framebuffer->RenderPass();
- auto* const pipeline =
- pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);
+ VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
+ graphics_key, framebuffer->NumColorBuffers(), async_shaders);
if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
// Async graphics pipeline was not ready.
return;
}
- buffer_bindings.Bind(device, scheduler);
-
BeginTransformFeedback();
scheduler.RequestRenderpass(framebuffer);
scheduler.BindGraphicsPipeline(pipeline->GetHandle());
UpdateDynamicStates();
- const auto pipeline_layout = pipeline->GetLayout();
- const auto descriptor_set = pipeline->CommitDescriptorSet();
+ const auto& regs = maxwell3d.regs;
+ const u32 num_instances = maxwell3d.mme_draw.instance_count;
+ const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
+ const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
+ const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, {});
+ DESCRIPTOR_SET, descriptor_set, nullptr);
+ }
+ if (draw_params.is_indexed) {
+ cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
+ draw_params.base_vertex, draw_params.base_instance);
+ } else {
+ cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.base_vertex, draw_params.base_instance);
}
- draw_params.Draw(cmdbuf);
});
EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
return;
}
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
if (use_stencil) {
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
-
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
auto& pipeline = pipeline_cache.GetComputePipeline({
.shader = code_addr,
.shared_memory_size = launch_desc.shared_alloc,
- .workgroup_size =
- {
- launch_desc.block_dim_x,
- launch_desc.block_dim_y,
- launch_desc.block_dim_z,
- },
+ .workgroup_size{
+ launch_desc.block_dim_x,
+ launch_desc.block_dim_y,
+ launch_desc.block_dim_z,
+ },
});
// Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
image_view_indices.clear();
sampler_handles.clear();
- auto lock = texture_cache.AcquireLock();
- texture_cache.SynchronizeComputeDescriptors();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
const auto& entries = pipeline.GetEntries();
+ buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+ buffer_cache.UnbindComputeStorageBuffers();
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_buffers) {
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+ buffer.is_written);
+ ++ssbo_index;
+ }
+ buffer_cache.UpdateComputeBuffers();
+
+ texture_cache.SynchronizeComputeDescriptors();
+
SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
- buffer_cache.Map(CalculateComputeStreamBufferSize());
-
update_descriptor_queue.Acquire();
- SetupComputeConstBuffers(entries);
- SetupComputeGlobalBuffers(entries);
+ buffer_cache.BindHostComputeBuffers();
ImageViewId* image_view_id_ptr = image_view_ids.data();
VkSampler* sampler_ptr = sampler_handles.data();
PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
sampler_ptr);
- buffer_cache.Unmap();
-
const VkPipeline pipeline_handle = pipeline.GetHandle();
const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
+void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
- buffer_cache.FlushRegion(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.DownloadMemory(addr, size);
+ }
query_cache.FlushRegion(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+ std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.MustFlushRegion(addr, size);
+ return buffer_cache.IsRegionGpuModified(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
- buffer_cache.MustFlushRegion(addr, size);
+ buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
pipeline_cache.InvalidateRegion(addr, size);
- buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
+ pipeline_cache.OnCPUWrite(addr, size);
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
- pipeline_cache.OnCPUWrite(addr, size);
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.CachedWriteMemory(addr, size);
+ }
}
void RasterizerVulkan::SyncGuestHost() {
- buffer_cache.SyncGuestHost();
pipeline_cache.SyncGuestHost();
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.FlushCachedWrites();
+ }
}
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
pipeline_cache.OnCPUWrite(addr, size);
}
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
draw_counter = 0;
update_descriptor_queue.TickFrame();
fence_manager.TickFrame();
- buffer_cache.TickFrame();
staging_pool.TickFrame();
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.TickFrame();
+ }
}
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
if (!framebuffer_addr) {
return false;
}
-
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
if (!image_view) {
return false;
}
-
screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
screen_info.width = image_view->size.width;
screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
- BufferBindings& buffer_bindings,
- bool is_indexed,
- bool is_instanced) {
- MICROPROFILE_SCOPE(Vulkan_Geometry);
-
- const auto& regs = maxwell3d.regs;
-
- SetupVertexArrays(buffer_bindings);
-
- const u32 base_instance = regs.vb_base_instance;
- const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
- const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
- const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
-
- DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
- SetupIndexBuffer(buffer_bindings, params, is_indexed);
-
- return params;
-}
-
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
image_view_indices.clear();
sampler_handles.clear();
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
if (!shader) {
continue;
}
- const auto& entries = shader->GetEntries();
+ const ShaderEntries& entries = shader->GetEntries();
SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
+
+ buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_buffers) {
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+ buffer.cbuf_offset, buffer.is_written);
+ ++ssbo_index;
+ }
}
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
update_descriptor_queue.Acquire();
ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
if (!shader) {
continue;
}
- const auto& entries = shader->GetEntries();
- SetupGraphicsConstBuffers(entries, stage);
- SetupGraphicsGlobalBuffers(entries, stage);
- PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
- sampler_ptr);
+ buffer_cache.BindHostStageBuffers(stage);
+ PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
+ image_view_id_ptr, sampler_ptr);
}
}
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
return;
}
-
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
- UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
- UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
- UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
-
- const auto& binding = regs.tfb_bindings[0];
- UNIMPLEMENTED_IF(binding.buffer_enable == 0);
- UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
- const GPUVAddr gpu_addr = binding.Address();
- const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-
- scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
- cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
- });
+ scheduler.Record(
+ [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
if (!device.IsExtTransformFeedbackSupported()) {
return;
}
-
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
- const auto& regs = maxwell3d.regs;
-
- for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled()) {
- continue;
- }
- const GPUVAddr start{vertex_array.StartAddress()};
- const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
-
- ASSERT(end >= start);
- const size_t size = end - start;
- if (size == 0) {
- buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
- continue;
- }
- const auto info = buffer_cache.UploadMemory(start, size);
- buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
- }
-}
-
-void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
- bool is_indexed) {
- if (params.num_vertices == 0) {
- return;
- }
- const auto& regs = maxwell3d.regs;
- switch (regs.draw.topology) {
- case Maxwell::PrimitiveTopology::Quads: {
- if (!params.is_indexed) {
- const auto [buffer, offset] =
- quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
- buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
- params.base_vertex = 0;
- params.num_vertices = params.num_vertices * 6 / 4;
- params.is_indexed = true;
- break;
- }
- const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
- VkBuffer buffer = info.handle;
- u64 offset = info.offset;
- std::tie(buffer, offset) = quad_indexed_pass.Assemble(
- regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
-
- buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
- params.num_vertices = (params.num_vertices / 4) * 6;
- params.base_vertex = 0;
- break;
- }
- default: {
- if (!is_indexed) {
- break;
- }
- const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
- VkBuffer buffer = info.handle;
- u64 offset = info.offset;
-
- auto format = regs.index_array.format;
- const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
- if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
- std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
- format = Maxwell::IndexFormat::UnsignedShort;
- }
-
- buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
- break;
- }
- }
-}
-
-void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& shader_stage = maxwell3d.state.shader_stages[stage];
- for (const auto& entry : entries.const_buffers) {
- SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto& cbufs{maxwell3d.state.shader_stages[stage]};
-
- for (const auto& entry : entries.global_buffers) {
- const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
- SetupGlobalBuffer(entry, addr);
- }
-}
-
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
}
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
}
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Images);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
}
}
-void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& launch_desc = kepler_compute.launch_description;
- for (const auto& entry : entries.const_buffers) {
- const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
- const Tegra::Engines::ConstBufferInfo info{
- .address = config.Address(),
- .size = config.size,
- .enabled = mask[entry.GetIndex()],
- };
- SetupConstBuffer(entry, info);
- }
-}
-
-void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
- for (const auto& entry : entries.global_buffers) {
- const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
- SetupGlobalBuffer(entry, addr);
- }
-}
-
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.uniform_texels) {
const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
}
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.samplers) {
for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
}
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.storage_texels) {
const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Images);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.images) {
const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
}
}
-void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer) {
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
- return;
- }
- // Align the size to avoid bad std140 interactions
- const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
- ASSERT(size <= MaxConstbufferSize);
-
- const u64 alignment = device.GetUniformBufferAlignment();
- const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
- update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
-void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
- const u64 actual_addr = gpu_memory.Read<u64>(address);
- const u32 size = gpu_memory.Read<u32>(address + 8);
-
- if (size == 0) {
- // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
- // because Vulkan doesn't like empty buffers.
- // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
- // default buffer.
- static constexpr size_t dummy_size = 4;
- const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
- update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
- return;
- }
-
- const auto info = buffer_cache.UploadMemory(
- actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
- update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
- GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
+ GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
+ };
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
}
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
- const std::array scissors = {
+ const std::array scissors{
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
- GetScissorState(regs, 15)};
+ GetScissorState(regs, 15),
+ };
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
}
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
-size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
- size_t size = CalculateVertexArraysSize();
- if (is_indexed) {
- size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
- }
- size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
- return size;
-}
-
-size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
- return Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-}
-
-size_t RasterizerVulkan::CalculateVertexArraysSize() const {
- const auto& regs = maxwell3d.regs;
-
- size_t size = 0;
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- // This implementation assumes that all attributes are used in the shader.
- const GPUVAddr start{regs.vertex_array[index].StartAddress()};
- const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
- DEBUG_ASSERT(end >= start);
-
- size += (end - start) * regs.vertex_array[index].enable;
- }
- return size;
-}
-
-size_t RasterizerVulkan::CalculateIndexBufferSize() const {
- return static_cast<size_t>(maxwell3d.regs.index_array.count) *
- static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
-}
-
-size_t RasterizerVulkan::CalculateConstBufferSize(
- const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
- if (entry.IsIndirect()) {
- // Buffer is accessed indirectly, so upload the entire thing
- return buffer.size;
- } else {
- // Buffer is accessed directly, upload just what we use
- return entry.GetSize();
- }
-}
-
-VkBuffer RasterizerVulkan::DefaultBuffer() {
- if (default_buffer) {
- return *default_buffer;
- }
- default_buffer = device.GetLogical().CreateBuffer({
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = DEFAULT_BUFFER_SIZE,
- .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
- default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
-
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
- });
- return *default_buffer;
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..acea1ba2d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,13 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader/async_shaders.h"
@@ -49,7 +48,6 @@ namespace Vulkan {
struct VKScreenInfo;
class StateTracker;
-class BufferBindings;
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
@@ -65,6 +63,7 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +106,11 @@ private:
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
- struct DrawParameters {
- void Draw(vk::CommandBuffer cmdbuf) const;
-
- u32 base_instance = 0;
- u32 num_instances = 0;
- u32 base_vertex = 0;
- u32 num_vertices = 0;
- bool is_indexed = 0;
- };
-
void FlushWork();
- /// Setups geometry buffers and state.
- DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
- bool is_indexed, bool is_instanced);
-
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
+ bool is_indexed);
void UpdateDynamicStates();
@@ -132,16 +118,6 @@ private:
void EndTransformFeedback();
- void SetupVertexArrays(BufferBindings& buffer_bindings);
-
- void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
-
- /// Setup constant buffers in the graphics pipeline.
- void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup global buffers in the graphics pipeline.
- void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
-
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
@@ -154,12 +130,6 @@ private:
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
- /// Setup constant buffers in the compute pipeline.
- void SetupComputeConstBuffers(const ShaderEntries& entries);
-
- /// Setup global buffers in the compute pipeline.
- void SetupComputeGlobalBuffers(const ShaderEntries& entries);
-
/// Setup texel buffers in the compute pipeline.
void SetupComputeUniformTexels(const ShaderEntries& entries);
@@ -172,11 +142,6 @@ private:
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
- void SetupConstBuffer(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer);
-
- void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
-
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +158,6 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
- size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
-
- size_t CalculateComputeStreamBufferSize() const;
-
- size_t CalculateVertexArraysSize() const;
-
- size_t CalculateIndexBufferSize() const;
-
- size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer) const;
-
- VkBuffer DefaultBuffer();
-
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +169,21 @@ private:
StateTracker& state_tracker;
VKScheduler& scheduler;
- VKStreamBuffer stream_buffer;
StagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
- QuadArrayPass quad_array_pass;
- QuadIndexedPass quad_indexed_pass;
- Uint8Pass uint8_pass;
+
+ GraphicsPipelineCacheKey graphics_key;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
+ BufferCacheRuntime buffer_cache_runtime;
+ BufferCache buffer_cache;
VKPipelineCache pipeline_cache;
- VKBufferCache buffer_cache;
VKQueryCache query_cache;
VKFenceManager fence_manager;
- vk::Buffer default_buffer;
- MemoryCommit default_buffer_commit;
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index ee274ac59..a8bf7bda8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -17,21 +17,21 @@ ResourcePool::~ResourcePool() = default;
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore.Refresh();
-
- const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> {
+ const u64 gpu_tick = master_semaphore.KnownGpuTick();
+ const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
- if (master_semaphore.IsFree(ticks[iterator])) {
+ if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore.CurrentTick();
return iterator;
}
}
- return {};
+ return std::nullopt;
};
// Try to find a free resource from the hinted position to the end.
- auto found = search(free_iterator, ticks.size());
+ std::optional<size_t> found = search(hint_iterator, ticks.size());
if (!found) {
// Search from beginning to the hinted position.
- found = search(0, free_iterator);
+ found = search(0, hint_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
@@ -41,7 +41,7 @@ size_t ResourcePool::CommitResource() {
}
}
// Free iterator is hinted to the resource after the one that's been commited.
- free_iterator = (*found + 1) % ticks.size();
+ hint_iterator = (*found + 1) % ticks.size();
return *found;
}
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index a018c7ec2..9d0bb3b4d 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -36,7 +36,7 @@ private:
MasterSemaphore& master_semaphore;
size_t grow_step = 0; ///< Number of new resources created after an overflow
- size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+ size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
-u64 VKScheduler::CurrentTick() const noexcept {
- return master_semaphore->CurrentTick();
-}
-
-bool VKScheduler::IsFree(u64 tick) const noexcept {
- return master_semaphore->IsFree(tick);
-}
-
-void VKScheduler::Wait(u64 tick) {
- master_semaphore->Wait(tick);
-}
-
void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
-class MasterSemaphore;
class StateTracker;
class VKQueryCache;
@@ -32,15 +32,6 @@ public:
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
~VKScheduler();
- /// Returns the current command buffer tick.
- [[nodiscard]] u64 CurrentTick() const noexcept;
-
- /// Returns true when a tick has been triggered by the GPU.
- [[nodiscard]] bool IsFree(u64 tick) const noexcept;
-
- /// Waits for the given tick to trigger on the GPU.
- void Wait(u64 tick);
-
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore semaphore = nullptr);
@@ -82,6 +73,21 @@ public:
(void)chunk->Record(command);
}
+ /// Returns the current command buffer tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept {
+ return master_semaphore->CurrentTick();
+ }
+
+ /// Returns true when a tick has been triggered by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) const noexcept {
+ return master_semaphore->IsFree(tick);
+ }
+
+ /// Waits for the given tick to trigger on the GPU.
+ void Wait(u64 tick) {
+ master_semaphore->Wait(tick);
+ }
+
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..c6846d886 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1845,13 +1845,21 @@ private:
Expression TextureGather(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
const Id coords = GetCoordinates(operation, Type::Float);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id texture{};
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
+ }
+
if (meta.sampler.is_shadow) {
texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
- AsFloat(Visit(meta.depth_compare)));
+ AsFloat(Visit(meta.depth_compare)), mask, operands);
} else {
u32 component_value = 0;
if (meta.component) {
@@ -1860,7 +1868,7 @@ private:
component_value = component->GetValue();
}
texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
- Constant(t_uint, component_value));
+ Constant(t_uint, component_value), mask, operands);
}
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1928,13 +1936,22 @@ private:
const Id image = GetTextureImage(operation);
const Id coords = GetCoordinates(operation, Type::Int);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id fetch;
+
if (meta.lod && !meta.sampler.is_buffer) {
- fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod,
- AsInt(Visit(meta.lod)));
- } else {
- fetch = OpImageFetch(t_float4, image, coords);
+ mask = mask | spv::ImageOperandsMask::Lod;
+ operands.push_back(AsInt(Visit(meta.lod)));
+ }
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
}
+
+ fetch = OpImageFetch(t_float4, image, coords, mask, operands);
return GetTextureElement(operation, fetch, Type::Float);
}
@@ -3106,7 +3123,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
}
for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written);
+ entries.global_buffers.emplace_back(GlobalBufferEntry{
+ .cbuf_index = base.cbuf_index,
+ .cbuf_offset = base.cbuf_offset,
+ .is_written = usage.is_written,
+ });
}
for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) {
@@ -3127,6 +3148,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
}
+ for (const auto& buffer : entries.const_buffers) {
+ entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+ }
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
u32 index{};
};
-class GlobalBufferEntry {
-public:
- constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
- : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
-
- constexpr u32 GetCbufIndex() const {
- return cbuf_index;
- }
-
- constexpr u32 GetCbufOffset() const {
- return cbuf_offset;
- }
-
- constexpr bool IsWritten() const {
- return is_written;
- }
-
-private:
+struct GlobalBufferEntry {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
+ u32 enabled_uniform_buffers{};
bool uses_warps{};
};
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..7a1232497 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
#include <fmt/format.h>
+#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
@@ -17,18 +18,119 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
+namespace {
+// Maximum potential alignment of a Vulkan buffer
+constexpr VkDeviceSize MAX_ALIGNMENT = 256;
+// Maximum size to put elements in the stream buffer
+constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
+// Stream buffer size in bytes
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
+
+constexpr VkMemoryPropertyFlags HOST_FLAGS =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
+
+bool IsStreamHeap(VkMemoryHeap heap) noexcept {
+ return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
+}
+
+std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
+ VkMemoryPropertyFlags flags) noexcept {
+ for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+ if (((type_mask >> type_index) & 1) == 0) {
+ // Memory type is incompatible
+ continue;
+ }
+ const VkMemoryType& memory_type = props.memoryTypes[type_index];
+ if ((memory_type.propertyFlags & flags) != flags) {
+ // Memory type doesn't have the flags we want
+ continue;
+ }
+ if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
+ // Memory heap is not suitable for streaming
+ continue;
+ }
+ // Success!
+ return type_index;
+ }
+ return std::nullopt;
+}
+
+u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
+ // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
+ std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
+ if (type) {
+ return *type;
+ }
+ // Otherwise try without the DEVICE_LOCAL_BIT
+ type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
+ if (type) {
+ return *type;
+ }
+ // This should never happen, and in case it does, signal it as an out of memory situation
+ throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+}
+
+size_t Region(size_t iterator) noexcept {
+ return iterator / REGION_SIZE;
+}
+} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_)
- : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {}
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
+ const vk::Device& dev = device.GetLogical();
+ stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = STREAM_BUFFER_SIZE,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ stream_buffer.SetObjectNameEXT("Stream Buffer");
+ }
+ VkMemoryDedicatedRequirements dedicated_reqs{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ .pNext = nullptr,
+ .prefersDedicatedAllocation = VK_FALSE,
+ .requiresDedicatedAllocation = VK_FALSE,
+ };
+ const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
+ const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
+ dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
+ const VkMemoryDedicatedAllocateInfo dedicated_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .image = nullptr,
+ .buffer = *stream_buffer,
+ };
+ const auto memory_properties = device.GetPhysical().GetMemoryProperties();
+ stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = make_dedicated ? &dedicated_info : nullptr,
+ .allocationSize = requirements.size,
+ .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
+ });
+ if (device.HasDebuggingToolAttached()) {
+ stream_memory.SetObjectNameEXT("Stream Buffer Memory");
+ }
+ stream_buffer.BindMemory(*stream_memory, 0);
+ stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
+}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
- if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
- return *ref;
+ if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
+ return GetStreamBuffer(size);
}
- return CreateStagingBuffer(size, usage);
+ return GetStagingBuffer(size, usage);
}
void StagingBufferPool::TickFrame() {
@@ -39,6 +141,52 @@ void StagingBufferPool::TickFrame() {
ReleaseCache(MemoryUsage::Download);
}
+StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
+ if (AreRegionsActive(Region(free_iterator) + 1,
+ std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
+ // Avoid waiting for the previous usages to be free
+ return GetStagingBuffer(size, MemoryUsage::Upload);
+ }
+ const u64 current_tick = scheduler.CurrentTick();
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
+ current_tick);
+ used_iterator = iterator;
+ free_iterator = std::max(free_iterator, iterator + size);
+
+ if (iterator + size >= STREAM_BUFFER_SIZE) {
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
+ current_tick);
+ used_iterator = 0;
+ iterator = 0;
+ free_iterator = size;
+
+ if (AreRegionsActive(0, Region(size) + 1)) {
+ // Avoid waiting for the previous usages to be free
+ return GetStagingBuffer(size, MemoryUsage::Upload);
+ }
+ }
+ const size_t offset = iterator;
+ iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ return StagingBufferRef{
+ .buffer = *stream_buffer,
+ .offset = static_cast<VkDeviceSize>(offset),
+ .mapped_span = std::span<u8>(stream_pointer + offset, size),
+ };
+}
+
+bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
+ const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
+ return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
+ [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
+};
+
+StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
+ if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
+ return *ref;
+ }
+ return CreateStagingBuffer(size, usage);
+}
+
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
MemoryUsage usage) {
StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..69f7618de 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
struct StagingBufferRef {
VkBuffer buffer;
+ VkDeviceSize offset;
std::span<u8> mapped_span;
};
class StagingBufferPool {
public:
+ static constexpr size_t NUM_SYNCS = 16;
+
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
VKScheduler& scheduler);
~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
void TickFrame();
private:
+ struct StreamBufferCommit {
+ size_t upper_bound;
+ u64 tick;
+ };
+
struct StagingBuffer {
vk::Buffer buffer;
MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
StagingBufferRef Ref() const noexcept {
return {
.buffer = *buffer,
+ .offset = 0,
.mapped_span = mapped_span,
};
}
@@ -56,6 +65,12 @@ private:
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
+ StagingBufferRef GetStreamBuffer(size_t size);
+
+ bool AreRegionsActive(size_t region_begin, size_t region_end) const;
+
+ StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
+
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +85,15 @@ private:
MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
+ vk::Buffer stream_buffer;
+ vk::DeviceMemory stream_memory;
+ u8* stream_pointer = nullptr;
+
+ size_t iterator = 0;
+ size_t used_iterator = 0;
+ size_t free_iterator = 0;
+ std::array<u64, NUM_SYNCS> sync_ticks{};
+
StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..956f86845 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -18,9 +18,7 @@
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace Vulkan {
-
namespace {
-
using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
@@ -30,15 +28,18 @@ using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
- static constexpr std::array INVALIDATION_FLAGS{
+ static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
- DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
+ DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
+ for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
+ flags[index] = true;
+ }
return flags;
}
@@ -125,12 +126,40 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable;
}
+void SetupDirtyBlending(Tables& tables) {
+ tables[0][OFF(color_mask_common)] = Blending;
+ tables[0][OFF(independent_blend_enable)] = Blending;
+ FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
+ FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
+ FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
+}
+
+void SetupDirtyInstanceDivisors(Tables& tables) {
+ static constexpr size_t divisor_offset = 3;
+ for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
+ tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
+ tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
+ InstanceDivisors;
+ }
+}
+
+void SetupDirtyVertexAttributes(Tables& tables) {
+ FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
+}
+
+void SetupDirtyViewportSwizzles(Tables& tables) {
+ static constexpr size_t swizzle_offset = 6;
+ for (size_t index = 0; index < Regs::NumViewports; ++index) {
+ tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
+ ViewportSwizzles;
+ }
+}
} // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables = gpu.Maxwell3D().dirty.tables;
- SetupDirtyRenderTargets(tables);
+ SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
@@ -145,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
+ SetupDirtyBlending(tables);
+ SetupDirtyInstanceDivisors(tables);
+ SetupDirtyVertexAttributes(tables);
+ SetupDirtyViewportSwizzles(tables);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index c335d2bdf..84e918a71 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -35,6 +35,11 @@ enum : u8 {
StencilOp,
StencilTestEnable,
+ Blending,
+ InstanceDivisors,
+ VertexAttributes,
+ ViewportSwizzles,
+
Last
};
static_assert(Last <= std::numeric_limits<u8>::max());
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 725a2a05d..0b63bd6c8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
- : surface{surface_}, device{device_}, scheduler{scheduler_} {}
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
+ u32 width, u32 height, bool srgb)
+ : surface{surface_}, device{device_}, scheduler{scheduler_} {
+ Create(width, height, srgb);
+}
VKSwapchain::~VKSwapchain() = default;
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 2eadd62b3..a728511e0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -20,7 +20,8 @@ class VKScheduler;
class VKSwapchain {
public:
- explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
+ explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
+ u32 width, u32 height, bool srgb);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
- static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
+ VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = ACCESS_FLAGS,
+ .srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
- .subresourceRange =
- {
- .aspectMask = aspect_mask,
- .baseMipLevel = 0,
- .levelCount = VK_REMAINING_MIP_LEVELS,
- .baseArrayLayer = 0,
- .layerCount = VK_REMAINING_ARRAY_LAYERS,
- },
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
};
const VkImageMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = ACCESS_FLAGS,
+ .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
- .subresourceRange =
- {
- .aspectMask = aspect_mask,
- .baseMipLevel = 0,
- .levelCount = VK_REMAINING_MIP_LEVELS,
- .baseArrayLayer = 0,
- .layerCount = VK_REMAINING_ARRAY_LAYERS,
- },
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
scheduler.Finish();
}
-ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
- const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
- return {
- .handle = staging_ref.buffer,
- .span = staging_ref.mapped_span,
- };
+StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Upload);
}
-ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
- const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
- return {
- .handle = staging_ref.buffer,
- .span = staging_ref.mapped_span,
- };
+StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Download);
}
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
- std::span<const BufferImageCopy> copies) {
+void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
- const VkBuffer src_buffer = map.handle;
+ std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ const VkBuffer src_buffer = map.buffer;
const VkImage vk_image = *image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
- const VkBuffer src_buffer = map.handle;
+ std::vector vk_copies = TransformBufferCopies(copies, map.offset);
+ const VkBuffer src_buffer = map.buffer;
const VkBuffer dst_buffer = *buffer;
scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
@@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
-void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
- std::span<const BufferImageCopy> copies) {
- std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
- scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
+void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
+ std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
- // TODO: Barriers
- cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
+ const VkImageMemoryBarrier read_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ const VkImageMemoryBarrier image_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ const VkMemoryBarrier memory_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, read_barrier);
+ cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, memory_write_barrier, nullptr, image_write_barrier);
});
}
@@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.pAttachments = attachments.data(),
.width = key.size.width,
.height = key.size.height,
- .layers = static_cast<u32>(num_layers),
+ .layers = static_cast<u32>(std::max(num_layers, 1)),
});
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
#include <compare>
#include <span>
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
namespace Vulkan {
-struct ImageBufferMap {
- [[nodiscard]] VkBuffer Handle() const noexcept {
- return handle;
- }
-
- [[nodiscard]] std::span<u8> Span() const noexcept {
- return span;
- }
-
- VkBuffer handle;
- std::span<u8> span;
-};
-
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
void Finish();
- [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
+ [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
- [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
return false;
}
- void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
+ void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>) {
UNREACHABLE();
}
@@ -112,13 +100,12 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
- std::span<const VideoCommon::BufferCopy> copies);
+ void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
- void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index f99273c6a..dc45fdcb1 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
void VKUpdateDescriptorQueue::TickFrame() {
- payload.clear();
+ payload_cursor = payload.data();
}
void VKUpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
- static constexpr std::size_t MIN_ENTRIES = 0x400;
+ static constexpr size_t MIN_ENTRIES = 0x400;
- if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
+ if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
- payload.clear();
+ payload_cursor = payload.data();
}
- upload_start = &*payload.end();
+ upload_start = payload_cursor;
}
void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index e214f7195..d35e77c44 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -4,8 +4,7 @@
#pragma once
-#include <variant>
-#include <boost/container/static_vector.hpp>
+#include <array>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,13 +15,15 @@ class Device;
class VKScheduler;
struct DescriptorUpdateEntry {
- DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
+ struct Empty {};
+ DescriptorUpdateEntry() = default;
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
-
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
+ Empty empty{};
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
@@ -41,39 +42,40 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
void AddImage(VkImageView image_view) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
- void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
- payload.emplace_back(VkDescriptorBufferInfo{
+ void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
+ *(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
- });
+ };
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- payload.emplace_back(texel_buffer);
+ *(payload_cursor++) = texel_buffer;
}
private:
const Device& device;
VKScheduler& scheduler;
+ DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
- boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+ std::array<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 3b40db9bc..02adcf9c7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -64,6 +64,7 @@ void AsyncShaders::FreeWorkers() {
void AsyncShaders::KillWorkers() {
is_thread_exiting.store(true);
+ cv.notify_all();
for (auto& thread : worker_threads) {
thread.detach();
}
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 0dbb1a31f..7fdff6e56 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -9,16 +9,7 @@
#include <shared_mutex>
#include <thread>
-// This header includes both Vulkan and OpenGL headers, this has to be fixed
-// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
-// Forcefully include glad early and undefine macros
#include <glad/glad.h>
-#ifdef CreateEvent
-#undef CreateEvent
-#endif
-#ifdef CreateSemaphore
-#undef CreateSemaphore
-#endif
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_device.h"
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 50f4e7d35..7728f600e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
+ case StoreType::Unsigned16:
case StoreType::Signed16: {
Node address = GetAddress(0);
Node memory = (this->*get_memory)(address);
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d3ea07aac..5f88537bc 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
+ uses_y_negate = true;
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 833fa2a39..c69681e8d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+ const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
@@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
- coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+ coords.push_back(
+ GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+ std::vector<Node> aoffi;
+ if (aoffi_enabled) {
+ aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
+ }
+
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
+ MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0c6ab0f07..1cd7c14d7 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -139,6 +139,10 @@ public:
return uses_legacy_varyings;
}
+ bool UsesYNegate() const {
+ return uses_y_negate;
+ }
+
bool UsesWarps() const {
return uses_warps;
}
@@ -465,6 +469,7 @@ private:
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_legacy_varyings{};
+ bool uses_y_negate{};
bool uses_warps{};
bool uses_indexed_samplers{};
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index c3c71657d..693e47158 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <mutex>
#include "video_core/shader_notify.h"
using namespace std::chrono_literals;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d1080300f..b1da69971 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -103,9 +103,6 @@ public:
/// Notify the cache that a new frame has been queued
void TickFrame();
- /// Return an unique mutually exclusive lock for the cache
- [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
-
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
@@ -179,6 +176,8 @@ public:
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+ std::mutex mutex;
+
private:
/// Iterate over all page indices in a range
template <typename Func>
@@ -212,8 +211,8 @@ private:
void RefreshContents(Image& image);
/// Upload data from guest to an image
- template <typename MapBuffer>
- void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
+ template <typename StagingBuffer>
+ void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -325,8 +324,6 @@ private:
RenderTargets render_targets;
- std::mutex mutex;
-
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
@@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() {
}
template <class P>
-std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
- return std::unique_lock{mutex};
-}
-
-template <class P>
const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
return slot_image_views[id];
}
@@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
});
for (const ImageId image_id : images) {
Image& image = slot_images[image_id];
- auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
+ auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
- image.DownloadMemory(map, 0, copies);
+ image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
+ SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
}
@@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() {
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
- auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
- size_t buffer_offset = 0;
+ auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
+ const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
- image.DownloadMemory(download_map, buffer_offset, copies);
- buffer_offset += image.unswizzled_size_bytes;
+ image.DownloadMemory(download_map, copies);
+ download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
- buffer_offset = 0;
- const std::span<u8> download_span = download_map.Span();
+ download_map.offset = original_offset;
+ std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
- const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
- buffer_offset += image.unswizzled_size_bytes;
+ SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+ download_map.offset += image.unswizzled_size_bytes;
+ download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop();
}
@@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
return;
}
- auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
- UploadImageContents(image, map, 0);
+ auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
+ UploadImageContents(image, staging);
runtime.InsertUploadMemoryBarrier();
}
template <class P>
-template <typename MapBuffer>
-void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
- const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
+template <typename StagingBuffer>
+void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
+ const std::span<u8> mapped_span = staging.mapped_span;
const GPUVAddr gpu_addr = image.gpu_addr;
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info);
- runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
+ runtime.AccelerateImageUpload(image, staging, uploads);
} else if (True(image.flags & ImageFlagBits::Converted)) {
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
- image.UploadMemory(map, buffer_offset, copies);
+ image.UploadMemory(staging, copies);
} else if (image.info.type == ImageType::Buffer) {
const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
- image.UploadMemory(map, buffer_offset, copies);
+ image.UploadMemory(staging, copies);
} else {
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
- image.UploadMemory(map, buffer_offset, copies);
+ image.UploadMemory(staging, copies);
}
}
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index bb2cdef81..a0bc1f7b6 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -169,40 +169,6 @@ template <u32 GOB_EXTENT>
return Common::DivCeil(AdjustMipSize(size, level), block_size);
}
-[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
- return config.Width() * config.Height() * BytesPerBlock(format);
-}
-
-[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
- switch (type) {
- case TextureType::Texture2D:
- case TextureType::Texture2DArray:
- case TextureType::Texture2DNoMipmap:
- case TextureType::Texture3D:
- case TextureType::TextureCubeArray:
- case TextureType::TextureCubemap:
- return true;
- case TextureType::Texture1D:
- case TextureType::Texture1DArray:
- case TextureType::Texture1DBuffer:
- return false;
- }
- return false;
-}
-
-[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
- switch (type) {
- case ImageType::e2D:
- case ImageType::e3D:
- case ImageType::Linear:
- return true;
- case ImageType::e1D:
- case ImageType::Buffer:
- return false;
- }
- UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
-}
-
[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
switch (num_samples) {
case 1:
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 53444e945..e1b38c6ac 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -38,19 +38,18 @@ namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
- std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
- system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
-
+ const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+ auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext();
- const auto scope = context->Acquire();
-
- auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
- if (!renderer->Init()) {
+ auto scope = context->Acquire();
+ try {
+ auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
+ gpu->BindRenderer(std::move(renderer));
+ return gpu;
+ } catch (const std::runtime_error& exception) {
+ LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}
-
- gpu->BindRenderer(std::move(renderer));
- return gpu;
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 51f53bc39..34d396434 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -18,27 +18,22 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
-
namespace Alternatives {
-
-constexpr std::array Depth24UnormS8_UINT{
+constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_D16_UNORM_S8_UINT,
- VkFormat{},
+ VK_FORMAT_UNDEFINED,
};
-constexpr std::array Depth16UnormS8_UINT{
+constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
- VkFormat{},
+ VK_FORMAT_UNDEFINED,
};
-
} // namespace Alternatives
constexpr std::array REQUIRED_EXTENSIONS{
- VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
@@ -51,7 +46,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+ VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+#ifdef _WIN32
+ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
+#endif
+#ifdef __linux__
+ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+#endif
};
template <typename T>
@@ -63,9 +65,9 @@ void SetNext(void**& next, T& data) {
constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
switch (format) {
case VK_FORMAT_D24_UNORM_S8_UINT:
- return Alternatives::Depth24UnormS8_UINT.data();
+ return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
case VK_FORMAT_D16_UNORM_S8_UINT:
- return Alternatives::Depth16UnormS8_UINT.data();
+ return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
default:
return nullptr;
}
@@ -195,78 +197,77 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const vk::InstanceDispatch& dld_)
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
format_properties{GetFormatProperties(physical)} {
- CheckSuitability();
+ CheckSuitability(surface != nullptr);
SetupFamilies(surface);
SetupFeatures();
const auto queue_cis = GetDeviceQueueCreateInfos();
- const std::vector extensions = LoadExtensions();
+ const std::vector extensions = LoadExtensions(surface != nullptr);
VkPhysicalDeviceFeatures2 features2{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = nullptr,
- .features{},
+ .features{
+ .robustBufferAccess = true,
+ .fullDrawIndexUint32 = false,
+ .imageCubeArray = true,
+ .independentBlend = true,
+ .geometryShader = true,
+ .tessellationShader = true,
+ .sampleRateShading = false,
+ .dualSrcBlend = false,
+ .logicOp = false,
+ .multiDrawIndirect = false,
+ .drawIndirectFirstInstance = false,
+ .depthClamp = true,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = false,
+ .depthBounds = false,
+ .wideLines = false,
+ .largePoints = true,
+ .alphaToOne = false,
+ .multiViewport = true,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = false,
+ .textureCompressionASTC_LDR = is_optimal_astc_supported,
+ .textureCompressionBC = false,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = false,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = false,
+ .shaderImageGatherExtended = true,
+ .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageMultisample = is_shader_storage_image_multisample,
+ .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
+ .shaderStorageImageWriteWithoutFormat = true,
+ .shaderUniformBufferArrayDynamicIndexing = false,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = false,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderInt16 = false,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
+ },
};
const void* first_next = &features2;
void** next = &features2.pNext;
- features2.features = {
- .robustBufferAccess = false,
- .fullDrawIndexUint32 = false,
- .imageCubeArray = true,
- .independentBlend = true,
- .geometryShader = true,
- .tessellationShader = true,
- .sampleRateShading = false,
- .dualSrcBlend = false,
- .logicOp = false,
- .multiDrawIndirect = false,
- .drawIndirectFirstInstance = false,
- .depthClamp = true,
- .depthBiasClamp = true,
- .fillModeNonSolid = false,
- .depthBounds = false,
- .wideLines = false,
- .largePoints = true,
- .alphaToOne = false,
- .multiViewport = true,
- .samplerAnisotropy = true,
- .textureCompressionETC2 = false,
- .textureCompressionASTC_LDR = is_optimal_astc_supported,
- .textureCompressionBC = false,
- .occlusionQueryPrecise = true,
- .pipelineStatisticsQuery = false,
- .vertexPipelineStoresAndAtomics = true,
- .fragmentStoresAndAtomics = true,
- .shaderTessellationAndGeometryPointSize = false,
- .shaderImageGatherExtended = true,
- .shaderStorageImageExtendedFormats = false,
- .shaderStorageImageMultisample = is_shader_storage_image_multisample,
- .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
- .shaderStorageImageWriteWithoutFormat = true,
- .shaderUniformBufferArrayDynamicIndexing = false,
- .shaderSampledImageArrayDynamicIndexing = false,
- .shaderStorageBufferArrayDynamicIndexing = false,
- .shaderStorageImageArrayDynamicIndexing = false,
- .shaderClipDistance = false,
- .shaderCullDistance = false,
- .shaderFloat64 = false,
- .shaderInt64 = false,
- .shaderInt16 = false,
- .shaderResourceResidency = false,
- .shaderResourceMinLod = false,
- .sparseBinding = false,
- .sparseResidencyBuffer = false,
- .sparseResidencyImage2D = false,
- .sparseResidencyImage3D = false,
- .sparseResidency2Samples = false,
- .sparseResidency4Samples = false,
- .sparseResidency8Samples = false,
- .sparseResidency16Samples = false,
- .sparseResidencyAliased = false,
- .variableMultisampleRate = false,
- .inheritedQueries = false,
- };
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
.pNext = nullptr,
@@ -379,20 +380,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
}
- VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
- if (ext_robustness2) {
- robustness2 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
- .pNext = nullptr,
- .robustBufferAccess2 = false,
- .robustImageAccess2 = true,
- .nullDescriptor = true,
- };
- SetNext(next, robustness2);
- } else {
- LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
- }
-
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -535,16 +522,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
return (supported_usage & wanted_usage) == wanted_usage;
}
-void Device::CheckSuitability() const {
+void Device::CheckSuitability(bool requires_swapchain) const {
std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
+ bool has_swapchain = false;
for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
- for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
+ const std::string_view name{property.extensionName};
+ for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
if (available_extensions[i]) {
continue;
}
- const std::string_view name{property.extensionName};
available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
}
+ has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
}
for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
if (available_extensions[i]) {
@@ -553,6 +542,11 @@ void Device::CheckSuitability() const {
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
+ if (requires_swapchain && !has_swapchain) {
+ LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
+ throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
+ }
+
struct LimitTuple {
u32 minimum;
u32 value;
@@ -572,9 +566,20 @@ void Device::CheckSuitability() const {
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
}
}
- const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
+ VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
+ robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
+
+ VkPhysicalDeviceFeatures2 features2{};
+ features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+ features2.pNext = &robustness2;
+
+ physical.GetFeatures2KHR(features2);
+
+ const VkPhysicalDeviceFeatures& features{features2.features};
const std::array feature_report{
+ std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
+ std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.imageCubeArray, "imageCubeArray"),
std::make_pair(features.independentBlend, "independentBlend"),
std::make_pair(features.depthClamp, "depthClamp"),
@@ -589,6 +594,9 @@ void Device::CheckSuitability() const {
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
+ std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
+ std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
+ std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
};
for (const auto& [is_supported, name] : feature_report) {
if (is_supported) {
@@ -599,17 +607,19 @@ void Device::CheckSuitability() const {
}
}
-std::vector<const char*> Device::LoadExtensions() {
+std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
std::vector<const char*> extensions;
- extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
+ extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
+ if (requires_surface) {
+ extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ }
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
bool has_ext_extended_dynamic_state{};
- bool has_ext_robustness2{};
for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
@@ -637,14 +647,12 @@ std::vector<const char*> Device::LoadExtensions() {
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
- test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
if (Settings::values.renderer_debug) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
}
}
-
VkPhysicalDeviceFeatures2KHR features;
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
@@ -661,7 +669,6 @@ std::vector<const char*> Device::LoadExtensions() {
is_float16_supported = float16_int8_features.shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
-
if (has_ext_subgroup_size_control) {
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
subgroup_features.sType =
@@ -688,7 +695,6 @@ std::vector<const char*> Device::LoadExtensions() {
} else {
is_warp_potentially_bigger = true;
}
-
if (has_ext_transform_feedback) {
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -710,7 +716,6 @@ std::vector<const char*> Device::LoadExtensions() {
ext_transform_feedback = true;
}
}
-
if (has_ext_custom_border_color) {
VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
@@ -723,7 +728,6 @@ std::vector<const char*> Device::LoadExtensions() {
ext_custom_border_color = true;
}
}
-
if (has_ext_extended_dynamic_state) {
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
@@ -736,19 +740,6 @@ std::vector<const char*> Device::LoadExtensions() {
ext_extended_dynamic_state = true;
}
}
-
- if (has_ext_robustness2) {
- VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
- robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
- robustness2.pNext = nullptr;
- features.pNext = &robustness2;
- physical.GetFeatures2KHR(features);
- if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
- extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
- ext_robustness2 = true;
- }
- }
-
return extensions;
}
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4b66dba7a..67d70cd22 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer };
const u32 GuestWarpSize = 32;
/// Handles data specific to a physical device.
-class Device final {
+class Device {
public:
explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
const vk::InstanceDispatch& dld);
@@ -227,10 +227,10 @@ public:
private:
/// Checks if the physical device is suitable.
- void CheckSuitability() const;
+ void CheckSuitability(bool requires_swapchain) const;
/// Loads extensions into a vector and stores available ones in this object.
- std::vector<const char*> LoadExtensions();
+ std::vector<const char*> LoadExtensions(bool requires_surface);
/// Sets up queue families.
void SetupFamilies(VkSurfaceKHR surface);
@@ -285,7 +285,6 @@ private:
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
- bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_renderdoc{}; ///< Has RenderDoc attached
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 889ecda0c..bfd6e6add 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <future>
#include <optional>
#include <span>
#include <utility>
@@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
- vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld);
+ vk::Instance instance =
+ std::async([&] {
+ return vk::Instance::Create(required_version, layers, extensions, dld);
+ }).get();
if (!vk::Load(*instance, dld)) {
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index d6eb3af31..2a8b7a907 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -7,6 +7,8 @@
#include <optional>
#include <vector>
+#include <glad/glad.h>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
@@ -55,10 +57,24 @@ struct Range {
class MemoryAllocation {
public:
- explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
- VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
- : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
- property_flags{properties}, shifted_memory_type{1U << type} {}
+ explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
+ u64 allocation_size_, u32 type)
+ : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
+ shifted_memory_type{1U << type} {}
+
+#if defined(_WIN32) || defined(__linux__)
+ ~MemoryAllocation() {
+ if (owning_opengl_handle != 0) {
+ glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
+ }
+ }
+#endif
+
+ MemoryAllocation& operator=(const MemoryAllocation&) = delete;
+ MemoryAllocation(const MemoryAllocation&) = delete;
+
+ MemoryAllocation& operator=(MemoryAllocation&&) = delete;
+ MemoryAllocation(MemoryAllocation&&) = delete;
[[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
const std::optional<u64> alloc = FindFreeRegion(size, alignment);
@@ -88,6 +104,31 @@ public:
return memory_mapped_span;
}
+#ifdef _WIN32
+ [[nodiscard]] u32 ExportOpenGLHandle() {
+ if (!owning_opengl_handle) {
+ glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
+ glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
+ GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
+ memory.GetMemoryWin32HandleKHR());
+ }
+ return owning_opengl_handle;
+ }
+#elif __linux__
+ [[nodiscard]] u32 ExportOpenGLHandle() {
+ if (!owning_opengl_handle) {
+ glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
+ glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
+ memory.GetMemoryFdKHR());
+ }
+ return owning_opengl_handle;
+ }
+#else
+ [[nodiscard]] u32 ExportOpenGLHandle() {
+ return 0;
+ }
+#endif
+
/// Returns whether this allocation is compatible with the arguments.
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
@@ -118,13 +159,15 @@ private:
return candidate;
}
- const Device& device; ///< Vulkan device.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const u64 allocation_size; ///< Size of this allocation.
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
std::vector<Range> commits; ///< All commit ranges done from this allocation.
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
+#if defined(_WIN32) || defined(__linux__)
+ u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
+#endif
};
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() {
return span;
}
+u32 MemoryCommit::ExportOpenGLHandle() const {
+ return allocation->ExportOpenGLHandle();
+}
+
void MemoryCommit::Release() {
if (allocation) {
allocation->Free(begin);
}
}
-MemoryAllocator::MemoryAllocator(const Device& device_)
- : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
+MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
+ : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
+ export_allocations{export_allocations_} {}
MemoryAllocator::~MemoryAllocator() = default;
@@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage)
void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const u32 type = FindType(flags, type_mask).value();
+ const VkExportMemoryAllocateInfo export_allocate_info{
+ .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+ .pNext = nullptr,
+#ifdef _WIN32
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+#elif __linux__
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+#else
+ .handleTypes = 0,
+#endif
+ };
vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = nullptr,
+ .pNext = export_allocations ? &export_allocate_info : nullptr,
.allocationSize = size,
.memoryTypeIndex = type,
});
- allocations.push_back(
- std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type));
+ allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
}
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 9e6cfabf9..d1ce29450 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -43,6 +43,9 @@ public:
/// It will map the backing allocation if it hasn't been mapped before.
std::span<u8> Map();
+ /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
+ u32 ExportOpenGLHandle() const;
+
/// Returns the Vulkan memory handler.
VkDeviceMemory Memory() const {
return memory;
@@ -67,7 +70,15 @@ private:
/// Allocates and releases memory allocations on demand.
class MemoryAllocator {
public:
- explicit MemoryAllocator(const Device& device_);
+ /**
+ * Construct memory allocator
+ *
+ * @param device_ Device to allocate from
+ * @param export_allocations_ True when allocations have to be exported
+ *
+ * @throw vk::Exception on failure
+ */
+ explicit MemoryAllocator(const Device& device_, bool export_allocations_);
~MemoryAllocator();
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
@@ -106,8 +117,9 @@ private:
/// Returns index to the fastest memory type compatible with the passed requirements.
std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
- const Device& device; ///< Device handle.
- const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+ const Device& device; ///< Device handle.
+ const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+ const bool export_allocations; ///< True when memory allocations have to be exported.
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
};
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 5e15ad607..2aa0ffbe6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkFreeCommandBuffers);
X(vkFreeDescriptorSets);
X(vkFreeMemory);
- X(vkGetBufferMemoryRequirements);
+ X(vkGetBufferMemoryRequirements2);
X(vkGetDeviceQueue);
X(vkGetEventStatus);
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
+ X(vkGetMemoryFdKHR);
+#ifdef _WIN32
+ X(vkGetMemoryWin32HandleKHR);
+#endif
X(vkGetQueryPoolResults);
X(vkGetSemaphoreCounterValueKHR);
X(vkMapMemory);
@@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
}
+int DeviceMemory::GetMemoryFdKHR() const {
+ const VkMemoryGetFdInfoKHR get_fd_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+ .pNext = nullptr,
+ .memory = handle,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+ };
+ int fd;
+ Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd));
+ return fd;
+}
+
+#ifdef _WIN32
+HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const {
+ const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+ .pNext = nullptr,
+ .memory = handle,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+ };
+ HANDLE win32_handle;
+ Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle));
+ return win32_handle;
+}
+#endif
+
void DeviceMemory::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
}
@@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
return DeviceMemory(memory, handle, *dld);
}
-VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
- VkMemoryRequirements requirements;
- dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
- return requirements;
+VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
+ void* pnext) const noexcept {
+ const VkBufferMemoryRequirementsInfo2 info{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+ .pNext = nullptr,
+ .buffer = buffer,
+ };
+ VkMemoryRequirements2 requirements{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = pnext,
+ .memoryRequirements{},
+ };
+ dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
+ return requirements.memoryRequirements;
}
VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 9689de0cb..3e36d356a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -15,8 +15,19 @@
#include <vector>
#define VK_NO_PROTOTYPES
+#ifdef _WIN32
+#define VK_USE_PLATFORM_WIN32_KHR
+#endif
#include <vulkan/vulkan.h>
+// Sanitize macros
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
#include "common/common_types.h"
#ifdef _MSC_VER
@@ -174,7 +185,7 @@ struct InstanceDispatch {
};
/// Table holding Vulkan device function pointers.
-struct DeviceDispatch : public InstanceDispatch {
+struct DeviceDispatch : InstanceDispatch {
PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
@@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
PFN_vkFreeMemory vkFreeMemory{};
- PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{};
+ PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
PFN_vkGetDeviceQueue vkGetDeviceQueue{};
PFN_vkGetEventStatus vkGetEventStatus{};
PFN_vkGetFenceStatus vkGetFenceStatus{};
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
+ PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
+#ifdef _WIN32
+ PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
+#endif
PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
PFN_vkMapMemory vkMapMemory{};
@@ -344,6 +359,9 @@ public:
/// Construct an empty handle.
Handle() = default;
+ /// Construct an empty handle.
+ Handle(std::nullptr_t) {}
+
/// Copying Vulkan objects is not supported and will never be.
Handle(const Handle&) = delete;
Handle& operator=(const Handle&) = delete;
@@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
public:
+ int GetMemoryFdKHR() const;
+
+#ifdef _WIN32
+ HANDLE GetMemoryWin32HandleKHR() const;
+#endif
+
/// Set object name.
void SetObjectNameEXT(const char* name) const;
@@ -847,7 +871,8 @@ public:
DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
- VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept;
+ VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
+ void* pnext = nullptr) const noexcept;
VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
@@ -1033,6 +1058,12 @@ public:
void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkDependencyFlags dependency_flags,
+ const VkMemoryBarrier& memory_barrier) const noexcept {
+ PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {});
+ }
+
+ void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+ VkDependencyFlags dependency_flags,
const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
}
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index fb9967c8f..b025ced1c 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -151,6 +151,7 @@ add_executable(yuzu
util/util.h
compatdb.cpp
compatdb.h
+ yuzu.qrc
yuzu.rc
)
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp
index c680fd2c2..b92cd6886 100644
--- a/src/yuzu/applets/controller.cpp
+++ b/src/yuzu/applets/controller.cpp
@@ -67,6 +67,8 @@ bool IsControllerCompatible(Settings::ControllerType controller_type,
return parameters.allow_right_joycon;
case Settings::ControllerType::Handheld:
return parameters.enable_single_mode && parameters.allow_handheld;
+ case Settings::ControllerType::GameCube:
+ return parameters.allow_gamecube_controller;
default:
return false;
}
@@ -370,7 +372,7 @@ void QtControllerSelectorDialog::SetSupportedControllers() {
QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme));
}
- if (parameters.allow_pro_controller) {
+ if (parameters.allow_pro_controller || parameters.allow_gamecube_controller) {
ui->controllerSupported5->setStyleSheet(
QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme));
} else {
@@ -420,6 +422,10 @@ void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index
Settings::ControllerType::Handheld);
emulated_controllers[player_index]->addItem(tr("Handheld"));
}
+
+ pairs.emplace_back(emulated_controllers[player_index]->count(),
+ Settings::ControllerType::GameCube);
+ emulated_controllers[player_index]->addItem(tr("GameCube Controller"));
}
Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex(
@@ -461,6 +467,7 @@ void QtControllerSelectorDialog::UpdateControllerIcon(std::size_t player_index)
switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
player_index)) {
case Settings::ControllerType::ProController:
+ case Settings::ControllerType::GameCube:
return QStringLiteral("image: url(:/controller/applet_pro_controller%0); ");
case Settings::ControllerType::DualJoyconDetached:
return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ");
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index ffdf34a4a..1c61d419d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -64,7 +64,7 @@ void EmuThread::run() {
emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
- system.Renderer().Rasterizer().LoadDiskResources(
+ system.Renderer().ReadRasterizer()->LoadDiskResources(
system.CurrentProcess()->GetTitleID(), stop_run,
[this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
emit LoadProgress(stage, value, total);
@@ -405,12 +405,17 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
if (event->source() == Qt::MouseEventSynthesizedBySystem) {
return;
}
-
auto pos = event->pos();
const auto [x, y] = ScaleTouch(pos);
- input_subsystem->GetMouse()->MouseMove(x, y);
+ const int center_x = width() / 2;
+ const int center_y = height() / 2;
+ input_subsystem->GetMouse()->MouseMove(x, y, center_x, center_y);
this->TouchMoved(x, y, 0);
+ if (Settings::values.mouse_panning) {
+ QCursor::setPos(mapToGlobal({center_x, center_y}));
+ }
+
emit MouseActivity();
}
@@ -714,6 +719,11 @@ void GRenderWindow::showEvent(QShowEvent* event) {
bool GRenderWindow::eventFilter(QObject* object, QEvent* event) {
if (event->type() == QEvent::HoverMove) {
+ if (Settings::values.mouse_panning) {
+ auto* hover_event = static_cast<QMouseEvent*>(event);
+ mouseMoveEvent(hover_event);
+ return false;
+ }
emit MouseActivity();
}
return false;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8d85a1986..3d6f64300 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -220,7 +220,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
// This must be in alphabetical order according to action name as it must have the same order as
// UISetting::values.shortcuts, which is alphabetically ordered.
// clang-format off
-const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
+const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
{QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}},
{QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
{QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -235,6 +235,7 @@ const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
{QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
{QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
{QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
+ {QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("F9"), Qt::ApplicationShortcut}},
{QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}},
{QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}},
}};
@@ -507,6 +508,9 @@ void Config::ReadControlValues() {
Settings::values.emulate_analog_keyboard =
ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
+ Settings::values.mouse_panning = ReadSetting(QStringLiteral("mouse_panning"), false).toBool();
+ Settings::values.mouse_panning_sensitivity =
+ ReadSetting(QStringLiteral("mouse_panning_sensitivity"), 1).toFloat();
ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
@@ -610,12 +614,6 @@ void Config::ReadDataStorageValues() {
QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)))
.toString()
.toStdString());
- FS::GetUserPath(FS::UserPath::CacheDir,
- qt_config
- ->value(QStringLiteral("cache_directory"),
- QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)))
- .toString()
- .toStdString());
Settings::values.gamecard_inserted =
ReadSetting(QStringLiteral("gamecard_inserted"), false).toBool();
Settings::values.gamecard_current_game =
@@ -778,14 +776,14 @@ void Config::ReadRendererValues() {
ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100);
ReadSettingGlobal(Settings::values.use_disk_shader_cache,
QStringLiteral("use_disk_shader_cache"), true);
- ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0);
+ ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
QStringLiteral("use_asynchronous_gpu_emulation"), true);
ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
true);
ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
- true);
+ false);
ReadSettingGlobal(Settings::values.use_asynchronous_shaders,
QStringLiteral("use_asynchronous_shaders"), false);
ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
@@ -1184,7 +1182,9 @@ void Config::SaveControlValues() {
WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false);
WriteSetting(QStringLiteral("emulate_analog_keyboard"),
Settings::values.emulate_analog_keyboard, false);
-
+ WriteSetting(QStringLiteral("mouse_panning"), Settings::values.mouse_panning, false);
+ WriteSetting(QStringLiteral("mouse_panning_sensitivity"),
+ Settings::values.mouse_panning_sensitivity, 1.0f);
qt_config->endGroup();
}
@@ -1212,9 +1212,6 @@ void Config::SaveDataStorageValues() {
WriteSetting(QStringLiteral("dump_directory"),
QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)),
QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)));
- WriteSetting(QStringLiteral("cache_directory"),
- QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)),
- QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)));
WriteSetting(QStringLiteral("gamecard_inserted"), Settings::values.gamecard_inserted, false);
WriteSetting(QStringLiteral("gamecard_current_game"), Settings::values.gamecard_current_game,
false);
@@ -1345,14 +1342,14 @@ void Config::SaveRendererValues() {
Settings::values.use_disk_shader_cache, true);
WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
- Settings::values.gpu_accuracy.UsingGlobal(), 0);
+ Settings::values.gpu_accuracy.UsingGlobal(), 1);
WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
Settings::values.use_asynchronous_gpu_emulation, true);
WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
true);
WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
- Settings::values.use_assembly_shaders, true);
+ Settings::values.use_assembly_shaders, false);
WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"),
Settings::values.use_asynchronous_shaders, false);
WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 8a600e19d..949c4eb13 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,7 +42,7 @@ public:
default_mouse_buttons;
static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
- static const std::array<UISettings::Shortcut, 16> default_hotkeys;
+ static const std::array<UISettings::Shortcut, 17> default_hotkeys;
private:
void Initialize(const std::string& config_name);
diff --git a/src/yuzu/configuration/configure_filesystem.cpp b/src/yuzu/configuration/configure_filesystem.cpp
index 7ab4a80f7..bde2d4620 100644
--- a/src/yuzu/configuration/configure_filesystem.cpp
+++ b/src/yuzu/configuration/configure_filesystem.cpp
@@ -26,8 +26,6 @@ ConfigureFilesystem::ConfigureFilesystem(QWidget* parent)
[this] { SetDirectory(DirectoryTarget::Dump, ui->dump_path_edit); });
connect(ui->load_path_button, &QToolButton::pressed, this,
[this] { SetDirectory(DirectoryTarget::Load, ui->load_path_edit); });
- connect(ui->cache_directory_button, &QToolButton::pressed, this,
- [this] { SetDirectory(DirectoryTarget::Cache, ui->cache_directory_edit); });
connect(ui->reset_game_list_cache, &QPushButton::pressed, this,
&ConfigureFilesystem::ResetMetadata);
@@ -50,8 +48,6 @@ void ConfigureFilesystem::setConfiguration() {
QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::DumpDir)));
ui->load_path_edit->setText(
QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::LoadDir)));
- ui->cache_directory_edit->setText(
- QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)));
ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted);
ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game);
@@ -72,9 +68,6 @@ void ConfigureFilesystem::applyConfiguration() {
ui->dump_path_edit->text().toStdString());
Common::FS::GetUserPath(Common::FS::UserPath::LoadDir,
ui->load_path_edit->text().toStdString());
- Common::FS::GetUserPath(Common::FS::UserPath::CacheDir,
- ui->cache_directory_edit->text().toStdString());
- Settings::values.gamecard_path = ui->gamecard_path_edit->text().toStdString();
Settings::values.gamecard_inserted = ui->gamecard_inserted->isChecked();
Settings::values.gamecard_current_game = ui->gamecard_current_game->isChecked();
@@ -103,9 +96,6 @@ void ConfigureFilesystem::SetDirectory(DirectoryTarget target, QLineEdit* edit)
case DirectoryTarget::Load:
caption = tr("Select Mod Load Directory...");
break;
- case DirectoryTarget::Cache:
- caption = tr("Select Cache Directory...");
- break;
}
QString str;
diff --git a/src/yuzu/configuration/configure_filesystem.h b/src/yuzu/configuration/configure_filesystem.h
index a79303760..2147cd405 100644
--- a/src/yuzu/configuration/configure_filesystem.h
+++ b/src/yuzu/configuration/configure_filesystem.h
@@ -32,7 +32,6 @@ private:
Gamecard,
Dump,
Load,
- Cache,
};
void SetDirectory(DirectoryTarget target, QLineEdit* edit);
diff --git a/src/yuzu/configuration/configure_filesystem.ui b/src/yuzu/configuration/configure_filesystem.ui
index 84bea0600..62b9abc7a 100644
--- a/src/yuzu/configuration/configure_filesystem.ui
+++ b/src/yuzu/configuration/configure_filesystem.ui
@@ -198,40 +198,7 @@
<string>Caching</string>
</property>
<layout class="QGridLayout" name="gridLayout_5">
- <item row="0" column="0">
- <widget class="QLabel" name="label_10">
- <property name="text">
- <string>Cache Directory</string>
- </property>
- </widget>
- </item>
- <item row="0" column="1">
- <spacer name="horizontalSpacer_3">
- <property name="orientation">
- <enum>Qt::Horizontal</enum>
- </property>
- <property name="sizeType">
- <enum>QSizePolicy::Fixed</enum>
- </property>
- <property name="sizeHint" stdset="0">
- <size>
- <width>40</width>
- <height>20</height>
- </size>
- </property>
- </spacer>
- </item>
- <item row="0" column="2">
- <widget class="QLineEdit" name="cache_directory_edit"/>
- </item>
- <item row="0" column="3">
- <widget class="QToolButton" name="cache_directory_button">
- <property name="text">
- <string>...</string>
- </property>
- </widget>
- </item>
- <item row="1" column="0" colspan="4">
+ <item row="0" column="0" colspan="2">
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<widget class="QCheckBox" name="cache_game_list">
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index b78a5dff0..9ff32aec4 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -2,6 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+// Include this early to include Vulkan headers how we want to
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
#include <QColorDialog>
#include <QComboBox>
#include <QVulkanInstance>
@@ -11,7 +14,8 @@
#include "core/core.h"
#include "core/settings.h"
#include "ui_configure_graphics.h"
-#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_library.h"
#include "yuzu/configuration/configuration_shared.h"
#include "yuzu/configuration/configure_graphics.h"
@@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
}
-void ConfigureGraphics::RetrieveVulkanDevices() {
+void ConfigureGraphics::RetrieveVulkanDevices() try {
+ using namespace Vulkan;
+
+ vk::InstanceDispatch dld;
+ const Common::DynamicLibrary library = OpenLibrary();
+ const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
+ const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
+
vulkan_devices.clear();
- for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
+ vulkan_devices.reserve(physical_devices.size());
+ for (const VkPhysicalDevice device : physical_devices) {
+ const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName;
vulkan_devices.push_back(QString::fromStdString(name));
}
+
+} catch (const Vulkan::vk::Exception& exception) {
+ LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
}
Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp
index 4e557bc6f..a1a0eb676 100644
--- a/src/yuzu/configuration/configure_input_advanced.cpp
+++ b/src/yuzu/configuration/configure_input_advanced.cpp
@@ -122,6 +122,9 @@ void ConfigureInputAdvanced::ApplyConfiguration() {
Settings::values.mouse_enabled = ui->mouse_enabled->isChecked();
Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked();
Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked();
+ Settings::values.mouse_panning = ui->mouse_panning->isChecked();
+ Settings::values.mouse_panning_sensitivity =
+ static_cast<float>(ui->mouse_panning_sensitivity->value());
Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked();
}
@@ -149,6 +152,8 @@ void ConfigureInputAdvanced::LoadConfiguration() {
ui->mouse_enabled->setChecked(Settings::values.mouse_enabled);
ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled);
ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard);
+ ui->mouse_panning->setChecked(Settings::values.mouse_panning);
+ ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity);
ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
UpdateUIEnabled();
diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui
index f207e5d3b..173130d8d 100644
--- a/src/yuzu/configuration/configure_input_advanced.ui
+++ b/src/yuzu/configuration/configure_input_advanced.ui
@@ -2546,27 +2546,65 @@
</property>
</widget>
</item>
- <item row="1" column="0">
- <widget class="QCheckBox" name="emulate_analog_keyboard">
- <property name="minimumSize">
- <size>
- <width>0</width>
- <height>23</height>
- </size>
- </property>
- <property name="text">
- <string>Emulate Analog with Keyboard Input</string>
- </property>
- </widget>
- </item>
- <item row="5" column="2">
+ <item row="1" column="0">
+ <widget class="QCheckBox" name="emulate_analog_keyboard">
+ <property name="minimumSize">
+ <size>
+ <width>0</width>
+ <height>23</height>
+ </size>
+ </property>
+ <property name="text">
+ <string>Emulate Analog with Keyboard Input</string>
+ </property>
+ </widget>
+ </item>
+ <item row="2" column="0">
+ <widget class="QCheckBox" name="mouse_panning">
+ <property name="minimumSize">
+ <size>
+ <width>0</width>
+ <height>23</height>
+ </size>
+ </property>
+ <property name="text">
+ <string>Enable mouse panning</string>
+ </property>
+ </widget>
+ </item>
+ <item row="2" column="2">
+ <widget class="QDoubleSpinBox" name="mouse_panning_sensitivity">
+ <property name="toolTip">
+ <string>Mouse sensitivity</string>
+ </property>
+ <property name="alignment">
+ <set>Qt::AlignCenter</set>
+ </property>
+ <property name="decimals">
+ <number>2</number>
+ </property>
+ <property name="minimum">
+ <double>0.100000000000000</double>
+ </property>
+ <property name="maximum">
+ <double>16.000000000000000</double>
+ </property>
+ <property name="singleStep">
+ <double>0.010000000000000</double>
+ </property>
+ <property name="value">
+ <double>1.000000000000000</double>
+ </property>
+ </widget>
+ </item>
+ <item row="6" column="2">
<widget class="QPushButton" name="touchscreen_advanced">
<property name="text">
<string>Advanced</string>
</property>
</widget>
</item>
- <item row="2" column="1">
+ <item row="3" column="1">
<spacer name="horizontalSpacer_8">
<property name="orientation">
<enum>Qt::Horizontal</enum>
@@ -2582,21 +2620,21 @@
</property>
</spacer>
</item>
- <item row="2" column="2">
+ <item row="3" column="2">
<widget class="QPushButton" name="mouse_advanced">
<property name="text">
<string>Advanced</string>
</property>
</widget>
</item>
- <item row="5" column="0">
+ <item row="6" column="0">
<widget class="QCheckBox" name="touchscreen_enabled">
<property name="text">
<string>Touchscreen</string>
</property>
</widget>
</item>
- <item row="2" column="0">
+ <item row="3" column="0">
<widget class="QCheckBox" name="mouse_enabled">
<property name="minimumSize">
<size>
@@ -2609,28 +2647,28 @@
</property>
</widget>
</item>
- <item row="7" column="0">
+ <item row="8" column="0">
<widget class="QLabel" name="motion_touch">
<property name="text">
<string>Motion / Touch</string>
</property>
</widget>
</item>
- <item row="7" column="2">
+ <item row="8" column="2">
<widget class="QPushButton" name="buttonMotionTouch">
<property name="text">
<string>Configure</string>
</property>
</widget>
</item>
- <item row="6" column="0">
+ <item row="7" column="0">
<widget class="QCheckBox" name="debug_enabled">
<property name="text">
<string>Debug Controller</string>
</property>
</widget>
</item>
- <item row="6" column="2">
+ <item row="7" column="2">
<widget class="QPushButton" name="debug_configure">
<property name="text">
<string>Configure</string>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index c9d19c948..21d0d3449 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -467,10 +467,14 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
UpdateControllerIcon();
UpdateControllerAvailableButtons();
+ UpdateControllerEnabledButtons();
+ UpdateControllerButtonNames();
UpdateMotionButtons();
connect(ui->comboControllerType, qOverload<int>(&QComboBox::currentIndexChanged), [this](int) {
UpdateControllerIcon();
UpdateControllerAvailableButtons();
+ UpdateControllerEnabledButtons();
+ UpdateControllerButtonNames();
UpdateMotionButtons();
});
@@ -558,9 +562,6 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
&ConfigureInputPlayer::SaveProfile);
LoadConfiguration();
-
- // TODO(wwylele): enable this when we actually emulate it
- ui->buttonHome->setEnabled(false);
ui->controllerFrame->SetPlayerInput(player_index, buttons_param, analogs_param);
ui->controllerFrame->SetConnectedStatus(ui->groupConnectedController->isChecked());
}
@@ -924,6 +925,12 @@ void ConfigureInputPlayer::SetConnectableControllers() {
Settings::ControllerType::Handheld);
ui->comboControllerType->addItem(tr("Handheld"));
}
+
+ if (enable_all || npad_style_set.gamecube == 1) {
+ index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
+ Settings::ControllerType::GameCube);
+ ui->comboControllerType->addItem(tr("GameCube Controller"));
+ }
};
Core::System& system{Core::System::GetInstance()};
@@ -1014,7 +1021,7 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
// List of all the widgets that will be hidden by any of the following layouts that need
// "unhidden" after the controller type changes
- const std::array<QWidget*, 9> layout_show = {
+ const std::array<QWidget*, 11> layout_show = {
ui->buttonShoulderButtonsSLSR,
ui->horizontalSpacerShoulderButtonsWidget,
ui->horizontalSpacerShoulderButtonsWidget2,
@@ -1024,6 +1031,8 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
ui->buttonShoulderButtonsRight,
ui->buttonMiscButtonsPlusHome,
ui->bottomRight,
+ ui->buttonMiscButtonsMinusGroup,
+ ui->buttonMiscButtonsScreenshotGroup,
};
for (auto* widget : layout_show) {
@@ -1056,6 +1065,14 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
ui->bottomLeft,
};
break;
+ case Settings::ControllerType::GameCube:
+ layout_hidden = {
+ ui->buttonShoulderButtonsSLSR,
+ ui->horizontalSpacerShoulderButtonsWidget2,
+ ui->buttonMiscButtonsMinusGroup,
+ ui->buttonMiscButtonsScreenshotGroup,
+ };
+ break;
}
for (auto* widget : layout_hidden) {
@@ -1063,6 +1080,52 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
}
}
+void ConfigureInputPlayer::UpdateControllerEnabledButtons() {
+ auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
+ if (debug) {
+ layout = Settings::ControllerType::ProController;
+ }
+
+ // List of all the widgets that will be disabled by any of the following layouts that need
+ // "enabled" after the controller type changes
+ const std::array<QWidget*, 4> layout_enable = {
+ ui->buttonHome,
+ ui->buttonLStickPressedGroup,
+ ui->groupRStickPressed,
+ ui->buttonShoulderButtonsButtonLGroup,
+ };
+
+ for (auto* widget : layout_enable) {
+ widget->setEnabled(true);
+ }
+
+ std::vector<QWidget*> layout_disable;
+ switch (layout) {
+ case Settings::ControllerType::ProController:
+ case Settings::ControllerType::DualJoyconDetached:
+ case Settings::ControllerType::Handheld:
+ case Settings::ControllerType::LeftJoycon:
+ case Settings::ControllerType::RightJoycon:
+ // TODO(wwylele): enable this when we actually emulate it
+ layout_disable = {
+ ui->buttonHome,
+ };
+ break;
+ case Settings::ControllerType::GameCube:
+ layout_disable = {
+ ui->buttonHome,
+ ui->buttonLStickPressedGroup,
+ ui->groupRStickPressed,
+ ui->buttonShoulderButtonsButtonLGroup,
+ };
+ break;
+ }
+
+ for (auto* widget : layout_disable) {
+ widget->setEnabled(false);
+ }
+}
+
void ConfigureInputPlayer::UpdateMotionButtons() {
if (debug) {
// Motion isn't used with the debug controller, hide both groupboxes.
@@ -1085,6 +1148,11 @@ void ConfigureInputPlayer::UpdateMotionButtons() {
ui->buttonMotionLeftGroup->hide();
ui->buttonMotionRightGroup->show();
break;
+ case Settings::ControllerType::GameCube:
+ // Hide both "Motion 1/2".
+ ui->buttonMotionLeftGroup->hide();
+ ui->buttonMotionRightGroup->hide();
+ break;
case Settings::ControllerType::DualJoyconDetached:
default:
// Show both "Motion 1/2".
@@ -1094,6 +1162,36 @@ void ConfigureInputPlayer::UpdateMotionButtons() {
}
}
+void ConfigureInputPlayer::UpdateControllerButtonNames() {
+ auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
+ if (debug) {
+ layout = Settings::ControllerType::ProController;
+ }
+
+ switch (layout) {
+ case Settings::ControllerType::ProController:
+ case Settings::ControllerType::DualJoyconDetached:
+ case Settings::ControllerType::Handheld:
+ case Settings::ControllerType::LeftJoycon:
+ case Settings::ControllerType::RightJoycon:
+ ui->buttonMiscButtonsPlusGroup->setTitle(tr("Plus"));
+ ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("ZL"));
+ ui->buttonShoulderButtonsZRGroup->setTitle(tr("ZR"));
+ ui->buttonShoulderButtonsRGroup->setTitle(tr("R"));
+ ui->LStick->setTitle(tr("Left Stick"));
+ ui->RStick->setTitle(tr("Right Stick"));
+ break;
+ case Settings::ControllerType::GameCube:
+ ui->buttonMiscButtonsPlusGroup->setTitle(tr("Start / Pause"));
+ ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("L"));
+ ui->buttonShoulderButtonsZRGroup->setTitle(tr("R"));
+ ui->buttonShoulderButtonsRGroup->setTitle(tr("Z"));
+ ui->LStick->setTitle(tr("Control Stick"));
+ ui->RStick->setTitle(tr("C-Stick"));
+ break;
+ }
+}
+
void ConfigureInputPlayer::UpdateMappingWithDefaults() {
if (ui->comboDevices->currentIndex() == 0) {
return;
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index da2b89136..efe953fbc 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -143,9 +143,15 @@ private:
/// Hides and disables controller settings based on the current controller type.
void UpdateControllerAvailableButtons();
+ /// Disables controller settings based on the current controller type.
+ void UpdateControllerEnabledButtons();
+
/// Shows or hides motion groupboxes based on the current controller type.
void UpdateMotionButtons();
+ /// Alters the button names based on the current controller type.
+ void UpdateControllerButtonNames();
+
/// Gets the default controller mapping for this device and auto configures the input to match.
void UpdateMappingWithDefaults();
diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp
index e3e8bde48..61ba91cef 100644
--- a/src/yuzu/configuration/configure_input_player_widget.cpp
+++ b/src/yuzu/configuration/configure_input_player_widget.cpp
@@ -37,7 +37,8 @@ void PlayerControlPreview::SetPlayerInput(std::size_t index, const ButtonParam&
Input::CreateDevice<Input::AnalogDevice>);
UpdateColors();
}
-void PlayerControlPreview::SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_,
+void PlayerControlPreview::SetPlayerInputRaw(std::size_t index,
+ const Settings::ButtonsRaw& buttons_,
Settings::AnalogsRaw analogs_) {
player_index = index;
std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN,
@@ -226,6 +227,9 @@ void PlayerControlPreview::paintEvent(QPaintEvent* event) {
case Settings::ControllerType::RightJoycon:
DrawRightController(p, center);
break;
+ case Settings::ControllerType::GameCube:
+ DrawGCController(p, center);
+ break;
case Settings::ControllerType::ProController:
default:
DrawProController(p, center);
@@ -517,14 +521,15 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center)
{
// Draw joysticks
using namespace Settings::NativeAnalog;
- DrawJoystick(p, center + QPointF(-65, -65) + (axis_values[LStick].value * 7), 1.62f,
- button_values[Settings::NativeButton::LStick]);
- DrawJoystick(p, center + QPointF(65, 12) + (axis_values[RStick].value * 7), 1.62f,
- button_values[Settings::NativeButton::RStick]);
- DrawRawJoystick(p, center + QPointF(-180, 90), axis_values[LStick].raw_value,
- axis_values[LStick].properties);
- DrawRawJoystick(p, center + QPointF(180, 90), axis_values[RStick].raw_value,
- axis_values[RStick].properties);
+ const auto& l_stick = axis_values[LStick];
+ const auto l_button = button_values[Settings::NativeButton::LStick];
+ const auto& r_stick = axis_values[RStick];
+ const auto r_button = button_values[Settings::NativeButton::RStick];
+
+ DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button);
+ DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button);
+ DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties);
+ DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties);
}
using namespace Settings::NativeButton;
@@ -603,14 +608,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen
{
// Draw joysticks
using namespace Settings::NativeAnalog;
- DrawJoystick(p, center + QPointF(-171, -41) + (axis_values[LStick].value * 4), 1.0f,
- button_values[Settings::NativeButton::LStick]);
- DrawJoystick(p, center + QPointF(171, 8) + (axis_values[RStick].value * 4), 1.0f,
- button_values[Settings::NativeButton::RStick]);
- DrawRawJoystick(p, center + QPointF(-50, 0), axis_values[LStick].raw_value,
- axis_values[LStick].properties);
- DrawRawJoystick(p, center + QPointF(50, 0), axis_values[RStick].raw_value,
- axis_values[RStick].properties);
+ const auto& l_stick = axis_values[LStick];
+ const auto l_button = button_values[Settings::NativeButton::LStick];
+ const auto& r_stick = axis_values[RStick];
+ const auto r_button = button_values[Settings::NativeButton::RStick];
+
+ DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button);
+ DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button);
+ DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties);
+ DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties);
}
using namespace Settings::NativeButton;
@@ -699,9 +705,9 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center)
{
// Draw joysticks
using namespace Settings::NativeAnalog;
- DrawProJoystick(p, center + QPointF(-111, -55) + (axis_values[LStick].value * 11),
+ DrawProJoystick(p, center + QPointF(-111, -55), axis_values[LStick].value, 11,
button_values[Settings::NativeButton::LStick]);
- DrawProJoystick(p, center + QPointF(51, 0) + (axis_values[RStick].value * 11),
+ DrawProJoystick(p, center + QPointF(51, 0), axis_values[RStick].value, 11,
button_values[Settings::NativeButton::RStick]);
DrawRawJoystick(p, center + QPointF(-50, 105), axis_values[LStick].raw_value,
axis_values[LStick].properties);
@@ -1002,12 +1008,6 @@ constexpr std::array<float, 3 * 2> up_arrow_symbol = {
0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f,
};
-constexpr std::array<float, 13 * 2> up_arrow = {
- 9.4f, -9.8f, 9.4f, -10.2f, 8.9f, -29.8f, 8.5f, -30.0f, 8.1f,
- -30.1f, 7.7f, -30.1f, -8.6f, -30.0f, -9.0f, -29.8f, -9.3f, -29.5f,
- -9.5f, -29.1f, -9.5f, -28.7f, -9.1f, -9.1f, -8.8f, -8.8f,
-};
-
constexpr std::array<float, 64 * 2> trigger_button = {
5.5f, -12.6f, 5.8f, -12.6f, 6.7f, -12.5f, 8.1f, -12.3f, 8.6f, -12.2f, 9.2f, -12.0f,
9.5f, -11.9f, 9.9f, -11.8f, 10.6f, -11.5f, 11.0f, -11.3f, 11.2f, -11.2f, 11.4f, -11.1f,
@@ -1457,15 +1457,18 @@ void PlayerControlPreview::DrawProBody(QPainter& p, const QPointF center) {
constexpr int radius1 = 32;
for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) {
- qleft_handle[point] =
- center + QPointF(pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]);
- qright_handle[point] =
- center + QPointF(-pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]);
+ const float left_x = pro_left_handle[point * 2 + 0];
+ const float left_y = pro_left_handle[point * 2 + 1];
+
+ qleft_handle[point] = center + QPointF(left_x, left_y);
+ qright_handle[point] = center + QPointF(-left_x, left_y);
}
for (std::size_t point = 0; point < pro_body.size() / 2; ++point) {
- qbody[point] = center + QPointF(pro_body[point * 2], pro_body[point * 2 + 1]);
- qbody[pro_body.size() - 1 - point] =
- center + QPointF(-pro_body[point * 2], pro_body[point * 2 + 1]);
+ const float body_x = pro_body[point * 2 + 0];
+ const float body_y = pro_body[point * 2 + 1];
+
+ qbody[point] = center + QPointF(body_x, body_y);
+ qbody[pro_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
}
// Draw left handle body
@@ -1496,21 +1499,25 @@ void PlayerControlPreview::DrawGCBody(QPainter& p, const QPointF center) {
constexpr float angle = 2 * 3.1415f / 8;
for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) {
- qleft_handle[point] =
- center + QPointF(gc_left_body[point * 2], gc_left_body[point * 2 + 1]);
- qright_handle[point] =
- center + QPointF(-gc_left_body[point * 2], gc_left_body[point * 2 + 1]);
+ const float body_x = gc_left_body[point * 2 + 0];
+ const float body_y = gc_left_body[point * 2 + 1];
+
+ qleft_handle[point] = center + QPointF(body_x, body_y);
+ qright_handle[point] = center + QPointF(-body_x, body_y);
}
for (std::size_t point = 0; point < gc_body.size() / 2; ++point) {
- qbody[point] = center + QPointF(gc_body[point * 2], gc_body[point * 2 + 1]);
- qbody[gc_body.size() - 1 - point] =
- center + QPointF(-gc_body[point * 2], gc_body[point * 2 + 1]);
+ const float body_x = gc_body[point * 2 + 0];
+ const float body_y = gc_body[point * 2 + 1];
+
+ qbody[point] = center + QPointF(body_x, body_y);
+ qbody[gc_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
}
for (std::size_t point = 0; point < 8; ++point) {
- left_hex[point] =
- center + QPointF(34 * std::cos(point * angle) - 111, 34 * std::sin(point * angle) - 44);
- right_hex[point] =
- center + QPointF(26 * std::cos(point * angle) + 61, 26 * std::sin(point * angle) + 37);
+ const float point_cos = std::cos(point * angle);
+ const float point_sin = std::sin(point * angle);
+
+ left_hex[point] = center + QPointF(34 * point_cos - 111, 34 * point_sin - 44);
+ right_hex[point] = center + QPointF(26 * point_cos + 61, 26 * point_sin + 37);
}
// Draw body
@@ -1631,32 +1638,36 @@ void PlayerControlPreview::DrawDualBody(QPainter& p, const QPointF center) {
constexpr float offset = 209.3f;
for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) {
- left_joycon[point] = center + QPointF(left_joycon_body[point * 2] * size + offset,
- left_joycon_body[point * 2 + 1] * size - 1);
- right_joycon[point] = center + QPointF(-left_joycon_body[point * 2] * size - offset,
- left_joycon_body[point * 2 + 1] * size - 1);
+ const float body_x = left_joycon_body[point * 2 + 0];
+ const float body_y = left_joycon_body[point * 2 + 1];
+
+ left_joycon[point] = center + QPointF(body_x * size + offset, body_y * size - 1);
+ right_joycon[point] = center + QPointF(-body_x * size - offset, body_y * size - 1);
}
for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) {
- qleft_joycon_slider[point] =
- center + QPointF(left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]);
- qright_joycon_slider[point] =
- center + QPointF(-left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]);
+ const float slider_x = left_joycon_slider[point * 2 + 0];
+ const float slider_y = left_joycon_slider[point * 2 + 1];
+
+ qleft_joycon_slider[point] = center + QPointF(slider_x, slider_y);
+ qright_joycon_slider[point] = center + QPointF(-slider_x, slider_y);
}
for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) {
+ const float top_view_x = left_joycon_topview[point * 2 + 0];
+ const float top_view_y = left_joycon_topview[point * 2 + 1];
+
qleft_joycon_topview[point] =
- center + QPointF(left_joycon_topview[point * 2] * size2 - 52,
- left_joycon_topview[point * 2 + 1] * size2 - 52);
+ center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
qright_joycon_topview[point] =
- center + QPointF(-left_joycon_topview[point * 2] * size2 + 52,
- left_joycon_topview[point * 2 + 1] * size2 - 52);
+ center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
}
for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) {
+ const float top_view_x = left_joycon_slider_topview[point * 2 + 0];
+ const float top_view_y = left_joycon_slider_topview[point * 2 + 1];
+
qleft_joycon_slider_topview[point] =
- center + QPointF(left_joycon_slider_topview[point * 2] * size2 - 52,
- left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
+ center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
qright_joycon_slider_topview[point] =
- center + QPointF(-left_joycon_slider_topview[point * 2] * size2 + 52,
- left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
+ center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
}
// right joycon body
@@ -1905,18 +1916,19 @@ void PlayerControlPreview::DrawProTriggers(QPainter& p, const QPointF center, bo
std::array<QPointF, pro_body_top.size()> qbody_top;
for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) {
- qleft_trigger[point] =
- center + QPointF(pro_left_trigger[point * 2],
- pro_left_trigger[point * 2 + 1] + (left_pressed ? 2 : 0));
- qright_trigger[point] =
- center + QPointF(-pro_left_trigger[point * 2],
- pro_left_trigger[point * 2 + 1] + (right_pressed ? 2 : 0));
+ const float trigger_x = pro_left_trigger[point * 2 + 0];
+ const float trigger_y = pro_left_trigger[point * 2 + 1];
+
+ qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 2 : 0));
+ qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 2 : 0));
}
for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) {
- qbody_top[pro_body_top.size() - 1 - point] =
- center + QPointF(-pro_body_top[point * 2], pro_body_top[point * 2 + 1]);
- qbody_top[point] = center + QPointF(pro_body_top[point * 2], pro_body_top[point * 2 + 1]);
+ const float top_x = pro_body_top[point * 2 + 0];
+ const float top_y = pro_body_top[point * 2 + 1];
+
+ qbody_top[pro_body_top.size() - 1 - point] = center + QPointF(-top_x, top_y);
+ qbody_top[point] = center + QPointF(top_x, top_y);
}
// Pro body detail
@@ -1939,12 +1951,11 @@ void PlayerControlPreview::DrawGCTriggers(QPainter& p, const QPointF center, boo
std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger;
for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) {
- qleft_trigger[point] =
- center + QPointF(left_gc_trigger[point * 2],
- left_gc_trigger[point * 2 + 1] + (left_pressed ? 10 : 0));
- qright_trigger[point] =
- center + QPointF(-left_gc_trigger[point * 2],
- left_gc_trigger[point * 2 + 1] + (right_pressed ? 10 : 0));
+ const float trigger_x = left_gc_trigger[point * 2 + 0];
+ const float trigger_y = left_gc_trigger[point * 2 + 1];
+
+ qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 10 : 0));
+ qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 10 : 0));
}
// Left trigger
@@ -1973,12 +1984,13 @@ void PlayerControlPreview::DrawHandheldTriggers(QPainter& p, const QPointF cente
std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger;
for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
+ const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
+ const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
+
qleft_trigger[point] =
- center + QPointF(left_joycon_trigger[point * 2],
- left_joycon_trigger[point * 2 + 1] + (left_pressed ? 0.5f : 0));
+ center + QPointF(left_trigger_x, left_trigger_y + (left_pressed ? 0.5f : 0));
qright_trigger[point] =
- center + QPointF(-left_joycon_trigger[point * 2],
- left_joycon_trigger[point * 2 + 1] + (right_pressed ? 0.5f : 0));
+ center + QPointF(-left_trigger_x, left_trigger_y + (right_pressed ? 0.5f : 0));
}
// Left trigger
@@ -1998,12 +2010,14 @@ void PlayerControlPreview::DrawDualTriggers(QPainter& p, const QPointF center, b
constexpr float size = 1.62f;
constexpr float offset = 210.6f;
for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
- qleft_trigger[point] =
- center + QPointF(left_joycon_trigger[point * 2] * size + offset,
- left_joycon_trigger[point * 2 + 1] * size + (left_pressed ? 0.5f : 0));
- qright_trigger[point] = center + QPointF(-left_joycon_trigger[point * 2] * size - offset,
- left_joycon_trigger[point * 2 + 1] * size +
- (right_pressed ? 0.5f : 0));
+ const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
+ const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
+
+ qleft_trigger[point] = center + QPointF(left_trigger_x * size + offset,
+ left_trigger_y * size + (left_pressed ? 0.5f : 0));
+ qright_trigger[point] =
+ center + QPointF(-left_trigger_x * size - offset,
+ left_trigger_y * size + (right_pressed ? 0.5f : 0));
}
// Left trigger
@@ -2023,13 +2037,16 @@ void PlayerControlPreview::DrawDualTriggersTopView(QPainter& p, const QPointF ce
constexpr float size = 0.9f;
for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
- qleft_trigger[point] = center + QPointF(left_joystick_L_topview[point * 2] * size - 50,
- left_joystick_L_topview[point * 2 + 1] * size - 52);
+ const float top_view_x = left_joystick_L_topview[point * 2 + 0];
+ const float top_view_y = left_joystick_L_topview[point * 2 + 1];
+
+ qleft_trigger[point] = center + QPointF(top_view_x * size - 50, top_view_y * size - 52);
}
for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
- qright_trigger[point] =
- center + QPointF(-left_joystick_L_topview[point * 2] * size + 50,
- left_joystick_L_topview[point * 2 + 1] * size - 52);
+ const float top_view_x = left_joystick_L_topview[point * 2 + 0];
+ const float top_view_y = left_joystick_L_topview[point * 2 + 1];
+
+ qright_trigger[point] = center + QPointF(-top_view_x * size + 50, top_view_y * size - 52);
}
p.setPen(colors.outline);
@@ -2273,15 +2290,39 @@ void PlayerControlPreview::DrawJoystickSideview(QPainter& p, const QPointF cente
p.drawLine(p2.at(32), p2.at(71));
}
-void PlayerControlPreview::DrawProJoystick(QPainter& p, const QPointF center, bool pressed) {
+void PlayerControlPreview::DrawProJoystick(QPainter& p, const QPointF center, const QPointF offset,
+ float offset_scalar, bool pressed) {
+ const float radius1 = 24.0f;
+ const float radius2 = 17.0f;
+
+ const QPointF offset_center = center + offset * offset_scalar;
+
+ const auto amplitude = static_cast<float>(
+ 1.0 - std::sqrt((offset.x() * offset.x()) + (offset.y() * offset.y())) * 0.1f);
+
+ const float rotation =
+ ((offset.x() == 0) ? atan(1) * 2 : atan(offset.y() / offset.x())) * (180 / (atan(1) * 4));
+
+ p.save();
+ p.translate(offset_center);
+ p.rotate(rotation);
+
// Outer circle
p.setPen(colors.outline);
p.setBrush(pressed ? colors.highlight : colors.button);
- DrawCircle(p, center, 24.0f);
+ p.drawEllipse(QPointF(0, 0), radius1 * amplitude, radius1);
// Inner circle
p.setBrush(pressed ? colors.highlight2 : colors.button2);
- DrawCircle(p, center, 17.0f);
+
+ const float inner_offset =
+ (radius1 - radius2) * 0.4f * ((offset.x() == 0 && offset.y() < 0) ? -1.0f : 1.0f);
+ const float offset_factor = (1.0f - amplitude) / 0.1f;
+
+ p.drawEllipse(QPointF((offset.x() < 0) ? -inner_offset : inner_offset, 0) * offset_factor,
+ radius2 * amplitude, radius2);
+
+ p.restore();
}
void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, bool pressed) {
@@ -2299,7 +2340,7 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo
}
void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value,
- const Input::AnalogProperties properties) {
+ const Input::AnalogProperties& properties) {
constexpr float size = 45.0f;
const float range = size * properties.range;
const float deadzone = size * properties.deadzone;
@@ -2422,17 +2463,16 @@ void PlayerControlPreview::DrawArrowButtonOutline(QPainter& p, const QPointF cen
std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline;
for (std::size_t point = 0; point < arrow_points - 1; ++point) {
- arrow_button_outline[point] = center + QPointF(up_arrow_button[point * 2] * size,
- up_arrow_button[point * 2 + 1] * size);
+ const float up_arrow_x = up_arrow_button[point * 2 + 0];
+ const float up_arrow_y = up_arrow_button[point * 2 + 1];
+
+ arrow_button_outline[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
arrow_button_outline[(arrow_points - 1) * 2 - point - 1] =
- center +
- QPointF(up_arrow_button[point * 2 + 1] * size, up_arrow_button[point * 2] * size);
+ center + QPointF(up_arrow_y * size, up_arrow_x * size);
arrow_button_outline[(arrow_points - 1) * 2 + point] =
- center +
- QPointF(-up_arrow_button[point * 2] * size, -up_arrow_button[point * 2 + 1] * size);
+ center + QPointF(-up_arrow_x * size, -up_arrow_y * size);
arrow_button_outline[(arrow_points - 1) * 4 - point - 1] =
- center +
- QPointF(-up_arrow_button[point * 2 + 1] * size, -up_arrow_button[point * 2] * size);
+ center + QPointF(-up_arrow_y * size, -up_arrow_x * size);
}
// Draw arrow button outline
p.setPen(colors.outline);
@@ -2446,22 +2486,21 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
QPoint offset;
for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) {
+ const float up_arrow_x = up_arrow_button[point * 2 + 0];
+ const float up_arrow_y = up_arrow_button[point * 2 + 1];
+
switch (direction) {
case Direction::Up:
- arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size,
- up_arrow_button[point * 2 + 1] * size);
+ arrow_button[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
break;
case Direction::Left:
- arrow_button[point] = center + QPointF(up_arrow_button[point * 2 + 1] * size,
- up_arrow_button[point * 2] * size);
+ arrow_button[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
break;
case Direction::Right:
- arrow_button[point] = center + QPointF(-up_arrow_button[point * 2 + 1] * size,
- up_arrow_button[point * 2] * size);
+ arrow_button[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
break;
case Direction::Down:
- arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size,
- -up_arrow_button[point * 2 + 1] * size);
+ arrow_button[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
break;
case Direction::None:
break;
@@ -2500,17 +2539,17 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center,
const Direction direction, bool pressed) {
std::array<QPointF, trigger_button.size() / 2> qtrigger_button;
- QPoint offset;
for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) {
+ const float trigger_button_x = trigger_button[point * 2 + 0];
+ const float trigger_button_y = trigger_button[point * 2 + 1];
+
switch (direction) {
case Direction::Left:
- qtrigger_button[point] =
- center + QPointF(-trigger_button[point * 2], trigger_button[point * 2 + 1]);
+ qtrigger_button[point] = center + QPointF(-trigger_button_x, trigger_button_y);
break;
case Direction::Right:
- qtrigger_button[point] =
- center + QPointF(trigger_button[point * 2], trigger_button[point * 2 + 1]);
+ qtrigger_button[point] = center + QPointF(trigger_button_x, trigger_button_y);
break;
case Direction::Up:
case Direction::Down:
@@ -2633,22 +2672,21 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di
std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol;
for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) {
+ const float up_arrow_x = up_arrow_symbol[point * 2 + 0];
+ const float up_arrow_y = up_arrow_symbol[point * 2 + 1];
+
switch (direction) {
case Direction::Up:
- arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size,
- up_arrow_symbol[point * 2 + 1] * size);
+ arrow_symbol[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
break;
case Direction::Left:
- arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2 + 1] * size,
- up_arrow_symbol[point * 2] * size);
+ arrow_symbol[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
break;
case Direction::Right:
- arrow_symbol[point] = center + QPointF(-up_arrow_symbol[point * 2 + 1] * size,
- up_arrow_symbol[point * 2] * size);
+ arrow_symbol[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
break;
case Direction::Down:
- arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size,
- -up_arrow_symbol[point * 2 + 1] * size);
+ arrow_symbol[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
break;
case Direction::None:
break;
diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h
index 39565f795..91c3343f1 100644
--- a/src/yuzu/configuration/configure_input_player_widget.h
+++ b/src/yuzu/configuration/configure_input_player_widget.h
@@ -25,7 +25,7 @@ public:
void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param,
const AnalogParam& analogs_param);
- void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_,
+ void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw& buttons_,
Settings::AnalogsRaw analogs_);
void SetConnectedStatus(bool checked);
void SetControllerType(Settings::ControllerType type);
@@ -138,9 +138,9 @@ private:
// Draw joystick functions
void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed);
void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed);
- void DrawRawJoystick(QPainter& p, QPointF center, const QPointF value,
- const Input::AnalogProperties properties);
- void DrawProJoystick(QPainter& p, QPointF center, bool pressed);
+ void DrawRawJoystick(QPainter& p, QPointF center, QPointF value,
+ const Input::AnalogProperties& properties);
+ void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed);
void DrawGCJoystick(QPainter& p, QPointF center, bool pressed);
// Draw button functions
diff --git a/src/yuzu/debugger/controller.cpp b/src/yuzu/debugger/controller.cpp
index 85724a8f3..2731d948d 100644
--- a/src/yuzu/debugger/controller.cpp
+++ b/src/yuzu/debugger/controller.cpp
@@ -42,7 +42,7 @@ void ControllerDialog::refreshConfiguration() {
QAction* ControllerDialog::toggleViewAction() {
if (toggle_view_action == nullptr) {
- toggle_view_action = new QAction(windowTitle(), this);
+ toggle_view_action = new QAction(tr("&Controller P1"), this);
toggle_view_action->setCheckable(true);
toggle_view_action->setChecked(isVisible());
connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ef92c25bc..0ba7c07cc 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -850,6 +850,16 @@ void GMainWindow::InitializeHotkeys() {
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
&QShortcut::activated, this,
[] { Settings::values.audio_muted = !Settings::values.audio_muted; });
+
+ connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this),
+ &QShortcut::activated, this, [&] {
+ Settings::values.mouse_panning = !Settings::values.mouse_panning;
+ if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
+ mouse_hide_timer.start();
+ render_window->installEventFilter(render_window);
+ render_window->setAttribute(Qt::WA_Hover, true);
+ }
+ });
}
void GMainWindow::SetDefaultUIGeometry() {
@@ -1197,7 +1207,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
multicore_status_button->setDisabled(true);
renderer_status_button->setDisabled(true);
- if (UISettings::values.hide_mouse) {
+ if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
mouse_hide_timer.start();
render_window->installEventFilter(render_window);
render_window->setAttribute(Qt::WA_Hover, true);
@@ -2359,7 +2369,7 @@ void GMainWindow::OnConfigure() {
config->Save();
- if (UISettings::values.hide_mouse && emulation_running) {
+ if ((UISettings::values.hide_mouse || Settings::values.mouse_panning) && emulation_running) {
render_window->installEventFilter(render_window);
render_window->setAttribute(Qt::WA_Hover, true);
mouse_hide_timer.start();
@@ -2480,6 +2490,11 @@ void GMainWindow::OnCaptureScreenshot() {
.arg(title_id, 16, 16, QLatin1Char{'0'})
.arg(date);
+ if (!Common::FS::CreateDir(screenshot_path.toStdString())) {
+ OnStartGame();
+ return;
+ }
+
#ifdef _WIN32
if (UISettings::values.enable_screenshot_save_as) {
filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename,
@@ -2600,7 +2615,8 @@ void GMainWindow::UpdateUISettings() {
}
void GMainWindow::HideMouseCursor() {
- if (emu_thread == nullptr || UISettings::values.hide_mouse == false) {
+ if (emu_thread == nullptr ||
+ (!UISettings::values.hide_mouse && !Settings::values.mouse_panning)) {
mouse_hide_timer.stop();
ShowMouseCursor();
return;
@@ -2610,13 +2626,16 @@ void GMainWindow::HideMouseCursor() {
void GMainWindow::ShowMouseCursor() {
render_window->unsetCursor();
- if (emu_thread != nullptr && UISettings::values.hide_mouse) {
+ if (emu_thread != nullptr &&
+ (UISettings::values.hide_mouse || Settings::values.mouse_panning)) {
mouse_hide_timer.start();
}
}
void GMainWindow::OnMouseActivity() {
- ShowMouseCursor();
+ if (!Settings::values.mouse_panning) {
+ ShowMouseCursor();
+ }
}
void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) {
@@ -2751,7 +2770,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
.arg(errors));
}
- QProgressDialog prog;
+ QProgressDialog prog(this);
prog.setRange(0, 0);
prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your "
"system's performance."));
@@ -2933,7 +2952,7 @@ void GMainWindow::filterBarSetChecked(bool state) {
}
void GMainWindow::UpdateUITheme() {
- const QString default_icons = QStringLiteral(":/icons/default");
+ const QString default_icons = QStringLiteral("default");
const QString& current_theme = UISettings::values.theme;
const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second);
QStringList theme_paths(default_theme_paths);
@@ -2949,7 +2968,6 @@ void GMainWindow::UpdateUITheme() {
qApp->setStyleSheet({});
setStyleSheet({});
}
- theme_paths.append(default_icons);
QIcon::setThemeName(default_icons);
} else {
const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss"));
@@ -2961,10 +2979,7 @@ void GMainWindow::UpdateUITheme() {
} else {
LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found");
}
-
- const QString theme_name = QStringLiteral(":/icons/") + current_theme;
- theme_paths.append({default_icons, theme_name});
- QIcon::setThemeName(theme_name);
+ QIcon::setThemeName(current_theme);
}
QIcon::setThemeSearchPaths(theme_paths);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index e2ad5baf6..048870687 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -14,8 +14,8 @@
<string>yuzu</string>
</property>
<property name="windowIcon">
- <iconset>
- <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
+ <iconset resource="yuzu.qrc">
+ <normaloff>:/img/yuzu.ico</normaloff>:/img/yuzu.ico</iconset>
</property>
<property name="tabShape">
<enum>QTabWidget::Rounded</enum>
@@ -303,6 +303,8 @@
</property>
</action>
</widget>
- <resources/>
+ <resources>
+ <include location="yuzu.qrc"/>
+ </resources>
<connections/>
</ui>
diff --git a/src/yuzu/yuzu.qrc b/src/yuzu/yuzu.qrc
new file mode 100644
index 000000000..5733cac98
--- /dev/null
+++ b/src/yuzu/yuzu.qrc
@@ -0,0 +1,5 @@
+<RCC>
+ <qresource prefix="/img">
+ <file alias="yuzu.ico">../../dist/yuzu.ico</file>
+ </qresource>
+</RCC>
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 0b3f2cb54..8461f8896 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -1,5 +1,15 @@
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
+function(create_resource file output filename)
+ # Read hex data from file
+ file(READ ${file} filedata HEX)
+ # Convert hex data for C compatibility
+ string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+ # Write data to output file
+ set(RESOURCES_DIR "${PROJECT_BINARY_DIR}/dist" PARENT_SCOPE)
+ file(WRITE "${PROJECT_BINARY_DIR}/dist/${output}" "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
+endfunction()
+
add_executable(yuzu-cmd
config.cpp
config.h
@@ -24,6 +34,9 @@ if (MSVC)
endif()
target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads)
+create_resource("../../dist/yuzu.bmp" "yuzu_cmd/yuzu_icon.h" "yuzu_icon")
+target_include_directories(yuzu-cmd PRIVATE ${RESOURCES_DIR})
+
target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
if(UNIX AND NOT APPLE)
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f76102459..6d8bc5509 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -329,9 +329,6 @@ void Config::ReadValues() {
FS::GetUserPath(
FS::UserPath::DumpDir,
sdl2_config->Get("Data Storage", "dump_directory", FS::GetUserPath(FS::UserPath::DumpDir)));
- FS::GetUserPath(FS::UserPath::CacheDir,
- sdl2_config->Get("Data Storage", "cache_directory",
- FS::GetUserPath(FS::UserPath::CacheDir)));
Settings::values.gamecard_inserted =
sdl2_config->GetBoolean("Data Storage", "gamecard_inserted", false);
Settings::values.gamecard_current_game =
@@ -388,7 +385,7 @@ void Config::ReadValues() {
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)));
Settings::values.use_disk_shader_cache.SetValue(
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
- const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
+ const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
Settings::values.use_asynchronous_gpu_emulation.SetValue(
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7843d5167..7e391ab89 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -12,6 +12,7 @@
#include "input_common/mouse/mouse_input.h"
#include "input_common/sdl/sdl.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
+#include "yuzu_cmd/yuzu_icon.h"
EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_)
: input_subsystem{input_subsystem_} {
@@ -30,7 +31,8 @@ EmuWindow_SDL2::~EmuWindow_SDL2() {
void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0);
- input_subsystem->GetMouse()->MouseMove(x, y);
+
+ input_subsystem->GetMouse()->MouseMove(x, y, 0, 0);
}
void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) {
@@ -193,6 +195,22 @@ void EmuWindow_SDL2::WaitEvent() {
}
}
+void EmuWindow_SDL2::SetWindowIcon() {
+ SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size);
+ if (yuzu_icon_stream == nullptr) {
+ LOG_WARNING(Frontend, "Failed to create yuzu icon stream.");
+ return;
+ }
+ SDL_Surface* const window_icon = SDL_LoadBMP_RW(yuzu_icon_stream, 1);
+ if (window_icon == nullptr) {
+ LOG_WARNING(Frontend, "Failed to read BMP from stream.");
+ return;
+ }
+ // The icon is attached to the window pointer
+ SDL_SetWindowIcon(render_window, window_icon);
+ SDL_FreeSurface(window_icon);
+}
+
void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) {
SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second);
}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
index a93141240..51a12a6a9 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
@@ -32,6 +32,9 @@ public:
/// Wait for the next event on the main thread.
void WaitEvent();
+ // Sets the window icon from yuzu.bmp
+ void SetWindowIcon();
+
protected:
/// Called by WaitEvent when a key is pressed or released.
void OnKeyEvent(int key, u8 state);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index deddea9ee..a02485c14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -107,6 +107,8 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste
dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
+ SetWindowIcon();
+
if (fullscreen) {
Fullscreen();
}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 3ba657c00..6f9b00461 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -35,6 +35,8 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
std::exit(EXIT_FAILURE);
}
+ SetWindowIcon();
+
switch (wm.subsystem) {
#ifdef SDL_VIDEO_DRIVER_WINDOWS
case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 0e1f3bdb3..982c41785 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
// Core is loaded, start the GPU (makes the GPU contexts current to this thread)
system.GPU().Start();
- system.Renderer().Rasterizer().LoadDiskResources(
+ system.Renderer().ReadRasterizer()->LoadDiskResources(
system.CurrentProcess()->GetTitleID(), false,
[](VideoCore::LoadCallbackStage, size_t value, size_t total) {});