summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt17
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt172
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt62
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt118
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt2
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt59
-rw-r--r--src/android/app/src/main/jni/native.cpp21
-rw-r--r--src/android/app/src/main/res/values/strings.xml24
-rw-r--r--src/audio_core/device/audio_buffers.h8
-rw-r--r--src/audio_core/device/device_session.cpp12
-rw-r--r--src/audio_core/device/device_session.h7
-rw-r--r--src/audio_core/in/audio_in_system.cpp5
-rw-r--r--src/audio_core/out/audio_out_system.cpp4
-rw-r--r--src/audio_core/renderer/adsp/adsp.cpp1
-rw-r--r--src/audio_core/renderer/adsp/audio_renderer.cpp5
-rw-r--r--src/audio_core/renderer/adsp/command_list_processor.cpp1
-rw-r--r--src/audio_core/renderer/command/data_source/decode.cpp23
-rw-r--r--src/audio_core/renderer/command/effect/compressor.cpp8
-rw-r--r--src/audio_core/renderer/command/effect/delay.cpp14
-rw-r--r--src/audio_core/renderer/command/effect/i3dl2_reverb.cpp4
-rw-r--r--src/audio_core/renderer/command/effect/light_limiter.cpp12
-rw-r--r--src/audio_core/renderer/command/effect/reverb.cpp12
-rw-r--r--src/audio_core/renderer/command/performance/performance.cpp15
-rw-r--r--src/audio_core/renderer/command/sink/circular_buffer.cpp4
-rw-r--r--src/audio_core/renderer/command/sink/device.cpp5
-rw-r--r--src/audio_core/renderer/mix/mix_context.cpp6
-rw-r--r--src/audio_core/renderer/nodes/node_states.cpp4
-rw-r--r--src/audio_core/renderer/nodes/node_states.h2
-rw-r--r--src/audio_core/renderer/system.cpp1
-rw-r--r--src/audio_core/sink/null_sink.h2
-rw-r--r--src/audio_core/sink/sink_stream.cpp16
-rw-r--r--src/audio_core/sink/sink_stream.h5
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/fs/fs.cpp8
-rw-r--r--src/common/fs/fs_types.h2
-rw-r--r--src/common/ring_buffer.h3
-rw-r--r--src/common/scratch_buffer.h9
-rw-r--r--src/common/settings.h1
-rw-r--r--src/common/steady_clock.cpp5
-rw-r--r--src/common/wall_clock.cpp77
-rw-r--r--src/common/wall_clock.h89
-rw-r--r--src/common/x64/cpu_detect.cpp3
-rw-r--r--src/common/x64/cpu_wait.cpp20
-rw-r--r--src/common/x64/native_clock.cpp166
-rw-r--r--src/common/x64/native_clock.h59
-rw-r--r--src/common/x64/rdtsc.cpp39
-rw-r--r--src/common/x64/rdtsc.h37
-rw-r--r--src/core/CMakeLists.txt1
-rw-r--r--src/core/core_timing.cpp52
-rw-r--r--src/core/core_timing.h14
-rw-r--r--src/core/core_timing_util.h58
-rw-r--r--src/core/file_sys/patch_manager.cpp9
-rw-r--r--src/core/file_sys/system_archive/time_zone_binary.cpp2
-rw-r--r--src/core/file_sys/vfs_concat.cpp14
-rw-r--r--src/core/file_sys/vfs_real.cpp101
-rw-r--r--src/core/file_sys/vfs_real.h22
-rw-r--r--src/core/hle/kernel/k_scheduler.cpp5
-rw-r--r--src/core/hle/kernel/k_synchronization_object.cpp3
-rw-r--r--src/core/hle/kernel/k_thread.cpp23
-rw-r--r--src/core/hle/kernel/k_thread.h7
-rw-r--r--src/core/hle/kernel/svc/svc_info.cpp4
-rw-r--r--src/core/hle/kernel/svc/svc_ipc.cpp7
-rw-r--r--src/core/hle/kernel/svc/svc_synchronization.cpp10
-rw-r--r--src/core/hle/kernel/svc/svc_thread.cpp2
-rw-r--r--src/core/hle/kernel/svc/svc_tick.cpp10
-rw-r--r--src/core/hle/service/audio/audin_u.cpp16
-rw-r--r--src/core/hle/service/audio/audout_u.cpp15
-rw-r--r--src/core/hle/service/audio/audren_u.cpp22
-rw-r--r--src/core/hle/service/audio/audren_u.h1
-rw-r--r--src/core/hle/service/audio/hwopus.cpp9
-rw-r--r--src/core/hle/service/hid/hidbus.cpp1
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h30
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp19
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h21
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp32
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h38
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp59
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h36
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h20
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp8
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h8
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp24
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h3
-rw-r--r--src/core/hle/service/nvnflinger/nvnflinger.cpp3
-rw-r--r--src/core/hle/service/nvnflinger/parcel.h7
-rw-r--r--src/core/hle/service/time/clock_types.h13
-rw-r--r--src/core/hle/service/time/standard_steady_clock_core.cpp2
-rw-r--r--src/core/hle/service/time/tick_based_steady_clock_core.cpp2
-rw-r--r--src/core/hle/service/time/time.cpp4
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp5
-rw-r--r--src/core/hle/service/time/time_zone_manager.cpp11
-rw-r--r--src/core/hle/service/time/time_zone_service.cpp10
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp2
-rw-r--r--src/shader_recompiler/runtime_info.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/cdma_pusher.h1
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/draw_manager.cpp10
-rw-r--r--src/video_core/engines/maxwell_dma.cpp35
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/host1x/codecs/h264.cpp4
-rw-r--r--src/video_core/memory_manager.cpp17
-rw-r--r--src/video_core/memory_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h11
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp78
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h9
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/texture_cache/image_base.h5
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp52
-rw-r--r--src/video_core/texture_cache/image_view_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h70
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h26
-rw-r--r--src/video_core/texture_cache/util.cpp48
-rw-r--r--src/video_core/texture_cache/util.h31
-rw-r--r--src/video_core/textures/texture.cpp7
-rw-r--r--src/video_core/transform_feedback.cpp8
-rw-r--r--src/video_core/transform_feedback.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h34
-rw-r--r--src/yuzu/CMakeLists.txt2
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp5
-rw-r--r--src/yuzu/configuration/configure_dialog.h7
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp55
-rw-r--r--src/yuzu/configuration/configure_graphics.h3
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp10
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu/configuration/configure_per_game.cpp5
-rw-r--r--src/yuzu/configuration/configure_per_game.h3
-rw-r--r--src/yuzu/main.cpp8
-rw-r--r--src/yuzu/main.h6
-rw-r--r--src/yuzu/vk_device_info.cpp61
-rw-r--r--src/yuzu/vk_device_info.h36
160 files changed, 1750 insertions, 1263 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt
index f0a6753a9..b1771b424 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt
@@ -27,13 +27,13 @@ import android.view.MotionEvent
import android.view.Surface
import android.view.View
import android.view.inputmethod.InputMethodManager
+import android.widget.Toast
import androidx.activity.viewModels
import androidx.appcompat.app.AppCompatActivity
import androidx.core.view.WindowCompat
import androidx.core.view.WindowInsetsCompat
import androidx.core.view.WindowInsetsControllerCompat
import androidx.navigation.fragment.NavHostFragment
-import kotlin.math.roundToInt
import org.yuzu.yuzu_emu.NativeLibrary
import org.yuzu.yuzu_emu.R
import org.yuzu.yuzu_emu.databinding.ActivityEmulationBinding
@@ -44,8 +44,10 @@ import org.yuzu.yuzu_emu.model.Game
import org.yuzu.yuzu_emu.utils.ControllerMappingHelper
import org.yuzu.yuzu_emu.utils.ForegroundService
import org.yuzu.yuzu_emu.utils.InputHandler
+import org.yuzu.yuzu_emu.utils.MemoryUtil
import org.yuzu.yuzu_emu.utils.NfcReader
import org.yuzu.yuzu_emu.utils.ThemeHelper
+import kotlin.math.roundToInt
class EmulationActivity : AppCompatActivity(), SensorEventListener {
private lateinit var binding: ActivityEmulationBinding
@@ -102,6 +104,19 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener {
inputHandler = InputHandler()
inputHandler.initialize()
+ val memoryUtil = MemoryUtil(this)
+ if (memoryUtil.isLessThan(8, MemoryUtil.Gb)) {
+ Toast.makeText(
+ this,
+ getString(
+ R.string.device_memory_inadequate,
+ memoryUtil.getDeviceRAM(),
+ "8 ${getString(R.string.memory_gigabyte)}"
+ ),
+ Toast.LENGTH_LONG
+ ).show()
+ }
+
// Start a foreground service to prevent the app from getting killed in the background
val startIntent = Intent(this, ForegroundService::class.java)
startForegroundService(startIntent)
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt
index 6f8adbba5..5a36ffad4 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt
@@ -68,79 +68,109 @@ class HomeSettingsFragment : Fragment() {
override fun onViewCreated(view: View, savedInstanceState: Bundle?) {
mainActivity = requireActivity() as MainActivity
- val optionsList: MutableList<HomeSetting> = mutableListOf(
- HomeSetting(
- R.string.advanced_settings,
- R.string.settings_description,
- R.drawable.ic_settings
- ) { SettingsActivity.launch(requireContext(), SettingsFile.FILE_NAME_CONFIG, "") },
- HomeSetting(
- R.string.open_user_folder,
- R.string.open_user_folder_description,
- R.drawable.ic_folder_open
- ) { openFileManager() },
- HomeSetting(
- R.string.preferences_theme,
- R.string.theme_and_color_description,
- R.drawable.ic_palette
- ) { SettingsActivity.launch(requireContext(), Settings.SECTION_THEME, "") },
- HomeSetting(
- R.string.install_gpu_driver,
- R.string.install_gpu_driver_description,
- R.drawable.ic_exit
- ) { driverInstaller() },
- HomeSetting(
- R.string.install_amiibo_keys,
- R.string.install_amiibo_keys_description,
- R.drawable.ic_nfc
- ) { mainActivity.getAmiiboKey.launch(arrayOf("*/*")) },
- HomeSetting(
- R.string.install_game_content,
- R.string.install_game_content_description,
- R.drawable.ic_system_update_alt
- ) { mainActivity.installGameUpdate.launch(arrayOf("*/*")) },
- HomeSetting(
- R.string.select_games_folder,
- R.string.select_games_folder_description,
- R.drawable.ic_add
- ) {
- mainActivity.getGamesDirectory.launch(Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data)
- },
- HomeSetting(
- R.string.manage_save_data,
- R.string.import_export_saves_description,
- R.drawable.ic_save
- ) {
- ImportExportSavesFragment().show(
- parentFragmentManager,
- ImportExportSavesFragment.TAG
+ val optionsList: MutableList<HomeSetting> = mutableListOf<HomeSetting>().apply {
+ add(
+ HomeSetting(
+ R.string.advanced_settings,
+ R.string.settings_description,
+ R.drawable.ic_settings
+ ) { SettingsActivity.launch(requireContext(), SettingsFile.FILE_NAME_CONFIG, "") }
+ )
+ add(
+ HomeSetting(
+ R.string.open_user_folder,
+ R.string.open_user_folder_description,
+ R.drawable.ic_folder_open
+ ) { openFileManager() }
+ )
+ add(
+ HomeSetting(
+ R.string.preferences_theme,
+ R.string.theme_and_color_description,
+ R.drawable.ic_palette
+ ) { SettingsActivity.launch(requireContext(), Settings.SECTION_THEME, "") }
+ )
+
+ if (GpuDriverHelper.supportsCustomDriverLoading()) {
+ add(
+ HomeSetting(
+ R.string.install_gpu_driver,
+ R.string.install_gpu_driver_description,
+ R.drawable.ic_exit
+ ) { driverInstaller() }
)
- },
- HomeSetting(
- R.string.install_prod_keys,
- R.string.install_prod_keys_description,
- R.drawable.ic_unlock
- ) { mainActivity.getProdKey.launch(arrayOf("*/*")) },
- HomeSetting(
- R.string.install_firmware,
- R.string.install_firmware_description,
- R.drawable.ic_firmware
- ) { mainActivity.getFirmware.launch(arrayOf("application/zip")) },
- HomeSetting(
- R.string.share_log,
- R.string.share_log_description,
- R.drawable.ic_log
- ) { shareLog() },
- HomeSetting(
- R.string.about,
- R.string.about_description,
- R.drawable.ic_info_outline
- ) {
- exitTransition = MaterialSharedAxis(MaterialSharedAxis.X, true)
- parentFragmentManager.primaryNavigationFragment?.findNavController()
- ?.navigate(R.id.action_homeSettingsFragment_to_aboutFragment)
}
- )
+
+ add(
+ HomeSetting(
+ R.string.install_amiibo_keys,
+ R.string.install_amiibo_keys_description,
+ R.drawable.ic_nfc
+ ) { mainActivity.getAmiiboKey.launch(arrayOf("*/*")) }
+ )
+ add(
+ HomeSetting(
+ R.string.install_game_content,
+ R.string.install_game_content_description,
+ R.drawable.ic_system_update_alt
+ ) { mainActivity.installGameUpdate.launch(arrayOf("*/*")) }
+ )
+ add(
+ HomeSetting(
+ R.string.select_games_folder,
+ R.string.select_games_folder_description,
+ R.drawable.ic_add
+ ) {
+ mainActivity.getGamesDirectory.launch(
+ Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data
+ )
+ }
+ )
+ add(
+ HomeSetting(
+ R.string.manage_save_data,
+ R.string.import_export_saves_description,
+ R.drawable.ic_save
+ ) {
+ ImportExportSavesFragment().show(
+ parentFragmentManager,
+ ImportExportSavesFragment.TAG
+ )
+ }
+ )
+ add(
+ HomeSetting(
+ R.string.install_prod_keys,
+ R.string.install_prod_keys_description,
+ R.drawable.ic_unlock
+ ) { mainActivity.getProdKey.launch(arrayOf("*/*")) }
+ )
+ add(
+ HomeSetting(
+ R.string.install_firmware,
+ R.string.install_firmware_description,
+ R.drawable.ic_firmware
+ ) { mainActivity.getFirmware.launch(arrayOf("application/zip")) }
+ )
+ add(
+ HomeSetting(
+ R.string.share_log,
+ R.string.share_log_description,
+ R.drawable.ic_log
+ ) { shareLog() }
+ )
+ add(
+ HomeSetting(
+ R.string.about,
+ R.string.about_description,
+ R.drawable.ic_info_outline
+ ) {
+ exitTransition = MaterialSharedAxis(MaterialSharedAxis.X, true)
+ parentFragmentManager.primaryNavigationFragment?.findNavController()
+ ?.navigate(R.id.action_homeSettingsFragment_to_aboutFragment)
+ }
+ )
+ }
if (!BuildConfig.PREMIUM) {
optionsList.add(
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt
new file mode 100644
index 000000000..b29b627e9
--- /dev/null
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt
@@ -0,0 +1,62 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package org.yuzu.yuzu_emu.fragments
+
+import android.app.Dialog
+import android.content.Intent
+import android.net.Uri
+import android.os.Bundle
+import androidx.fragment.app.DialogFragment
+import com.google.android.material.dialog.MaterialAlertDialogBuilder
+import org.yuzu.yuzu_emu.R
+
+class LongMessageDialogFragment : DialogFragment() {
+ override fun onCreateDialog(savedInstanceState: Bundle?): Dialog {
+ val titleId = requireArguments().getInt(TITLE)
+ val description = requireArguments().getString(DESCRIPTION)
+ val helpLinkId = requireArguments().getInt(HELP_LINK)
+
+ val dialog = MaterialAlertDialogBuilder(requireContext())
+ .setPositiveButton(R.string.close, null)
+ .setTitle(titleId)
+ .setMessage(description)
+
+ if (helpLinkId != 0) {
+ dialog.setNeutralButton(R.string.learn_more) { _, _ ->
+ openLink(getString(helpLinkId))
+ }
+ }
+
+ return dialog.show()
+ }
+
+ private fun openLink(link: String) {
+ val intent = Intent(Intent.ACTION_VIEW, Uri.parse(link))
+ startActivity(intent)
+ }
+
+ companion object {
+ const val TAG = "LongMessageDialogFragment"
+
+ private const val TITLE = "Title"
+ private const val DESCRIPTION = "Description"
+ private const val HELP_LINK = "Link"
+
+ fun newInstance(
+ titleId: Int,
+ description: String,
+ helpLinkId: Int = 0
+ ): LongMessageDialogFragment {
+ val dialog = LongMessageDialogFragment()
+ val bundle = Bundle()
+ bundle.apply {
+ putInt(TITLE, titleId)
+ putString(DESCRIPTION, description)
+ putInt(HELP_LINK, helpLinkId)
+ }
+ dialog.arguments = bundle
+ return dialog
+ }
+ }
+}
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt
index cc1d87f1b..3086cfad3 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt
@@ -4,6 +4,7 @@
package org.yuzu.yuzu_emu.ui.main
import android.content.Intent
+import android.net.Uri
import android.os.Bundle
import android.view.View
import android.view.ViewGroup.MarginLayoutParams
@@ -42,6 +43,7 @@ import org.yuzu.yuzu_emu.features.settings.model.SettingsViewModel
import org.yuzu.yuzu_emu.features.settings.ui.SettingsActivity
import org.yuzu.yuzu_emu.features.settings.utils.SettingsFile
import org.yuzu.yuzu_emu.fragments.IndeterminateProgressDialogFragment
+import org.yuzu.yuzu_emu.fragments.LongMessageDialogFragment
import org.yuzu.yuzu_emu.fragments.MessageDialogFragment
import org.yuzu.yuzu_emu.model.GamesViewModel
import org.yuzu.yuzu_emu.model.HomeViewModel
@@ -481,62 +483,110 @@ class MainActivity : AppCompatActivity(), ThemeProvider {
}
}
- val installGameUpdate =
- registerForActivityResult(ActivityResultContracts.OpenDocument()) {
- if (it == null) {
- return@registerForActivityResult
- }
-
+ val installGameUpdate = registerForActivityResult(
+ ActivityResultContracts.OpenMultipleDocuments()
+ ) { documents: List<Uri> ->
+ if (documents.isNotEmpty()) {
IndeterminateProgressDialogFragment.newInstance(
this@MainActivity,
R.string.install_game_content
) {
- val result = NativeLibrary.installFileToNand(it.toString())
+ var installSuccess = 0
+ var installOverwrite = 0
+ var errorBaseGame = 0
+ var errorExtension = 0
+ var errorOther = 0
+ var errorTotal = 0
lifecycleScope.launch {
- withContext(Dispatchers.Main) {
- when (result) {
+ documents.forEach {
+ when (NativeLibrary.installFileToNand(it.toString())) {
NativeLibrary.InstallFileToNandResult.Success -> {
- Toast.makeText(
- applicationContext,
- R.string.install_game_content_success,
- Toast.LENGTH_SHORT
- ).show()
+ installSuccess += 1
}
NativeLibrary.InstallFileToNandResult.SuccessFileOverwritten -> {
- Toast.makeText(
- applicationContext,
- R.string.install_game_content_success_overwrite,
- Toast.LENGTH_SHORT
- ).show()
+ installOverwrite += 1
}
NativeLibrary.InstallFileToNandResult.ErrorBaseGame -> {
- MessageDialogFragment.newInstance(
- R.string.install_game_content_failure,
- R.string.install_game_content_failure_base
- ).show(supportFragmentManager, MessageDialogFragment.TAG)
+ errorBaseGame += 1
}
NativeLibrary.InstallFileToNandResult.ErrorFilenameExtension -> {
- MessageDialogFragment.newInstance(
- R.string.install_game_content_failure,
- R.string.install_game_content_failure_file_extension,
- R.string.install_game_content_help_link
- ).show(supportFragmentManager, MessageDialogFragment.TAG)
+ errorExtension += 1
}
else -> {
- MessageDialogFragment.newInstance(
- R.string.install_game_content_failure,
- R.string.install_game_content_failure_description,
- R.string.install_game_content_help_link
- ).show(supportFragmentManager, MessageDialogFragment.TAG)
+ errorOther += 1
}
}
}
+ withContext(Dispatchers.Main) {
+ val separator = System.getProperty("line.separator") ?: "\n"
+ val installResult = StringBuilder()
+ if (installSuccess > 0) {
+ installResult.append(
+ getString(
+ R.string.install_game_content_success_install,
+ installSuccess
+ )
+ )
+ installResult.append(separator)
+ }
+ if (installOverwrite > 0) {
+ installResult.append(
+ getString(
+ R.string.install_game_content_success_overwrite,
+ installOverwrite
+ )
+ )
+ installResult.append(separator)
+ }
+ errorTotal = errorBaseGame + errorExtension + errorOther
+ if (errorTotal > 0) {
+ installResult.append(separator)
+ installResult.append(
+ getString(
+ R.string.install_game_content_failed_count,
+ errorTotal
+ )
+ )
+ installResult.append(separator)
+ if (errorBaseGame > 0) {
+ installResult.append(separator)
+ installResult.append(
+ getString(R.string.install_game_content_failure_base)
+ )
+ installResult.append(separator)
+ }
+ if (errorExtension > 0) {
+ installResult.append(separator)
+ installResult.append(
+ getString(R.string.install_game_content_failure_file_extension)
+ )
+ installResult.append(separator)
+ }
+ if (errorOther > 0) {
+ installResult.append(
+ getString(R.string.install_game_content_failure_description)
+ )
+ installResult.append(separator)
+ }
+ LongMessageDialogFragment.newInstance(
+ R.string.install_game_content_failure,
+ installResult.toString().trim(),
+ R.string.install_game_content_help_link
+ ).show(supportFragmentManager, LongMessageDialogFragment.TAG)
+ } else {
+ LongMessageDialogFragment.newInstance(
+ R.string.install_game_content_success,
+ installResult.toString().trim()
+ ).show(supportFragmentManager, LongMessageDialogFragment.TAG)
+ }
+ }
}
- return@newInstance result
+ return@newInstance installSuccess + installOverwrite + errorTotal
}.show(supportFragmentManager, IndeterminateProgressDialogFragment.TAG)
}
+ }
}
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt
index dad159481..1d4695a2a 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt
@@ -113,6 +113,8 @@ object GpuDriverHelper {
initializeDriverParameters(context)
}
+ external fun supportsCustomDriverLoading(): Boolean
+
// Parse the custom driver metadata to retrieve the name.
val customDriverName: String?
get() {
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt
new file mode 100644
index 000000000..18e5fa0b0
--- /dev/null
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package org.yuzu.yuzu_emu.utils
+
+import android.app.ActivityManager
+import android.content.Context
+import org.yuzu.yuzu_emu.R
+import java.util.Locale
+
+class MemoryUtil(val context: Context) {
+
+ private val Long.floatForm: String
+ get() = String.format(Locale.ROOT, "%.2f", this.toDouble())
+
+ private fun bytesToSizeUnit(size: Long): String {
+ return when {
+ size < Kb -> "${size.floatForm} ${context.getString(R.string.memory_byte)}"
+ size < Mb -> "${(size / Kb).floatForm} ${context.getString(R.string.memory_kilobyte)}"
+ size < Gb -> "${(size / Mb).floatForm} ${context.getString(R.string.memory_megabyte)}"
+ size < Tb -> "${(size / Gb).floatForm} ${context.getString(R.string.memory_gigabyte)}"
+ size < Pb -> "${(size / Tb).floatForm} ${context.getString(R.string.memory_terabyte)}"
+ size < Eb -> "${(size / Pb).floatForm} ${context.getString(R.string.memory_petabyte)}"
+ else -> "${(size / Eb).floatForm} ${context.getString(R.string.memory_exabyte)}"
+ }
+ }
+
+ private val totalMemory =
+ with(context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager) {
+ val memInfo = ActivityManager.MemoryInfo()
+ getMemoryInfo(memInfo)
+ memInfo.totalMem
+ }
+
+ fun isLessThan(minimum: Int, size: Long): Boolean {
+ return when (size) {
+ Kb -> totalMemory < Mb && totalMemory < minimum
+ Mb -> totalMemory < Gb && (totalMemory / Mb) < minimum
+ Gb -> totalMemory < Tb && (totalMemory / Gb) < minimum
+ Tb -> totalMemory < Pb && (totalMemory / Tb) < minimum
+ Pb -> totalMemory < Eb && (totalMemory / Pb) < minimum
+ Eb -> totalMemory / Eb < minimum
+ else -> totalMemory < Kb && totalMemory < minimum
+ }
+ }
+
+ fun getDeviceRAM(): String {
+ return bytesToSizeUnit(totalMemory)
+ }
+
+ companion object {
+ const val Kb: Long = 1024
+ const val Mb = Kb * 1024
+ const val Gb = Mb * 1024
+ const val Tb = Gb * 1024
+ const val Pb = Tb * 1024
+ const val Eb = Pb * 1024
+ }
+}
diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp
index f9617202b..f4fed0886 100644
--- a/src/android/app/src/main/jni/native.cpp
+++ b/src/android/app/src/main/jni/native.cpp
@@ -237,6 +237,7 @@ public:
m_software_keyboard = android_keyboard.get();
m_system.SetShuttingDown(false);
m_system.ApplySettings();
+ Settings::LogSettings();
m_system.HIDCore().ReloadInputDevices();
m_system.SetAppletFrontendSet({
nullptr, // Amiibo Settings
@@ -560,6 +561,26 @@ void JNICALL Java_org_yuzu_yuzu_1emu_NativeLibrary_initializeGpuDriver(
GetJString(env, custom_driver_name), GetJString(env, file_redirect_dir));
}
+[[maybe_unused]] static bool CheckKgslPresent() {
+ constexpr auto KgslPath{"/dev/kgsl-3d0"};
+
+ return access(KgslPath, F_OK) == 0;
+}
+
+[[maybe_unused]] bool SupportsCustomDriver() {
+ return android_get_device_api_level() >= 28 && CheckKgslPresent();
+}
+
+jboolean JNICALL Java_org_yuzu_yuzu_1emu_utils_GpuDriverHelper_supportsCustomDriverLoading(
+ [[maybe_unused]] JNIEnv* env, [[maybe_unused]] jobject instance) {
+#ifdef ARCHITECTURE_arm64
+ // If the KGSL device exists custom drivers can be loaded using adrenotools
+ return SupportsCustomDriver();
+#else
+ return false;
+#endif
+}
+
jboolean Java_org_yuzu_yuzu_1emu_NativeLibrary_reloadKeys(JNIEnv* env,
[[maybe_unused]] jclass clazz) {
Core::Crypto::KeyManager::Instance().ReloadKeys();
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index cc1d8c39d..21805d274 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -104,12 +104,14 @@
<string name="share_log_missing">No log file found</string>
<string name="install_game_content">Install game content</string>
<string name="install_game_content_description">Install game updates or DLC</string>
- <string name="install_game_content_failure">Error installing file to NAND</string>
- <string name="install_game_content_failure_description">Game content installation failed. Please ensure content is valid and that the prod.keys file is installed.</string>
- <string name="install_game_content_failure_base">Installation of base games isn\'t permitted in order to avoid possible conflicts. Please select an update or DLC instead.</string>
- <string name="install_game_content_failure_file_extension">The selected file type is not supported. Only NSP and XCI content is supported for this action. Please verify the game content is valid.</string>
- <string name="install_game_content_success">Game content installed successfully</string>
- <string name="install_game_content_success_overwrite">Game content was overwritten successfully</string>
+ <string name="install_game_content_failure">Error installing file(s) to NAND</string>
+ <string name="install_game_content_failure_description">Please ensure content(s) are valid and that the prod.keys file is installed.</string>
+ <string name="install_game_content_failure_base">Installation of base games isn\'t permitted in order to avoid possible conflicts.</string>
+ <string name="install_game_content_failure_file_extension">Only NSP and XCI content is supported. Please verify the game content(s) are valid.</string>
+ <string name="install_game_content_failed_count">%1$d installation error(s)</string>
+ <string name="install_game_content_success">Game content(s) installed successfully</string>
+ <string name="install_game_content_success_install">%1$d installed successfully</string>
+ <string name="install_game_content_success_overwrite">%1$d overwritten successfully</string>
<string name="install_game_content_help_link">https://yuzu-emu.org/help/quickstart/#dumping-installed-updates</string>
<!-- About screen strings -->
@@ -270,6 +272,7 @@
<string name="fatal_error">Fatal Error</string>
<string name="fatal_error_message">A fatal error occurred. Check the log for details.\nContinuing emulation may result in crashes and bugs.</string>
<string name="performance_warning">Turning off this setting will significantly reduce emulation performance! For the best experience, it is recommended that you leave this setting enabled.</string>
+ <string name="device_memory_inadequate">Device RAM: %1$s\nRecommended: %2$s</string>
<!-- Region Names -->
<string name="region_japan">Japan</string>
@@ -300,6 +303,15 @@
<string name="language_traditional_chinese">Traditional Chinese (正體中文)</string>
<string name="language_brazilian_portuguese">Brazilian Portuguese (Português do Brasil)</string>
+ <!-- Memory Sizes -->
+ <string name="memory_byte">Byte</string>
+ <string name="memory_kilobyte">KB</string>
+ <string name="memory_megabyte">MB</string>
+ <string name="memory_gigabyte">GB</string>
+ <string name="memory_terabyte">TB</string>
+ <string name="memory_petabyte">PB</string>
+ <string name="memory_exabyte">EB</string>
+
<!-- Renderer APIs -->
<string name="renderer_vulkan">Vulkan</string>
<string name="renderer_none">None</string>
diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h
index 15082f6c6..5d8ed0ef7 100644
--- a/src/audio_core/device/audio_buffers.h
+++ b/src/audio_core/device/audio_buffers.h
@@ -7,6 +7,7 @@
#include <mutex>
#include <span>
#include <vector>
+#include <boost/container/static_vector.hpp>
#include "audio_buffer.h"
#include "audio_core/device/device_session.h"
@@ -48,7 +49,7 @@ public:
*
* @param out_buffers - The buffers which were registered.
*/
- void RegisterBuffers(std::vector<AudioBuffer>& out_buffers) {
+ void RegisterBuffers(boost::container::static_vector<AudioBuffer, N>& out_buffers) {
std::scoped_lock l{lock};
const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit),
BufferAppendLimit - registered_count)};
@@ -162,7 +163,8 @@ public:
* @param max_buffers - Maximum number of buffers to released.
* @return The number of buffers released.
*/
- u32 GetRegisteredAppendedBuffers(std::vector<AudioBuffer>& buffers_flushed, u32 max_buffers) {
+ u32 GetRegisteredAppendedBuffers(
+ boost::container::static_vector<AudioBuffer, N>& buffers_flushed, u32 max_buffers) {
std::scoped_lock l{lock};
if (registered_count + appended_count == 0) {
return 0;
@@ -270,7 +272,7 @@ public:
*/
bool FlushBuffers(u32& buffers_released) {
std::scoped_lock l{lock};
- std::vector<AudioBuffer> buffers_flushed{};
+ boost::container::static_vector<AudioBuffer, N> buffers_flushed{};
buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit);
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index b5c0ef0e6..86811fcb8 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -79,7 +79,7 @@ void DeviceSession::ClearBuffers() {
}
}
-void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const {
+void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) {
for (const auto& buffer : buffers) {
Sink::SinkBuffer new_buffer{
.frames = buffer.size / (channel_count * sizeof(s16)),
@@ -88,13 +88,13 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const {
.consumed = false,
};
+ tmp_samples.resize_destructive(buffer.size / sizeof(s16));
if (type == Sink::StreamType::In) {
- std::vector<s16> samples{};
- stream->AppendBuffer(new_buffer, samples);
+ stream->AppendBuffer(new_buffer, tmp_samples);
} else {
- std::vector<s16> samples(buffer.size / sizeof(s16));
- system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, samples.data(), buffer.size);
- stream->AppendBuffer(new_buffer, samples);
+ system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(),
+ buffer.size);
+ stream->AppendBuffer(new_buffer, tmp_samples);
}
}
}
diff --git a/src/audio_core/device/device_session.h b/src/audio_core/device/device_session.h
index 75f766c68..7d52f362d 100644
--- a/src/audio_core/device/device_session.h
+++ b/src/audio_core/device/device_session.h
@@ -10,6 +10,7 @@
#include "audio_core/common/common.h"
#include "audio_core/sink/sink.h"
+#include "common/scratch_buffer.h"
#include "core/hle/service/audio/errors.h"
namespace Core {
@@ -62,7 +63,7 @@ public:
*
* @param buffers - The buffers to play.
*/
- void AppendBuffers(std::span<const AudioBuffer> buffers) const;
+ void AppendBuffers(std::span<const AudioBuffer> buffers);
/**
* (Audio In only) Pop samples from the backend, and write them back to this buffer's address.
@@ -146,8 +147,8 @@ private:
std::shared_ptr<Core::Timing::EventType> thread_event;
/// Is this session initialised?
bool initialized{};
- /// Buffer queue
- std::vector<AudioBuffer> buffer_queue{};
+ /// Temporary sample buffer
+ Common::ScratchBuffer<s16> tmp_samples{};
};
} // namespace AudioCore
diff --git a/src/audio_core/in/audio_in_system.cpp b/src/audio_core/in/audio_in_system.cpp
index e23e51758..579129121 100644
--- a/src/audio_core/in/audio_in_system.cpp
+++ b/src/audio_core/in/audio_in_system.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex>
+
#include "audio_core/audio_event.h"
#include "audio_core/audio_manager.h"
#include "audio_core/in/audio_in_system.h"
@@ -89,7 +90,7 @@ Result System::Start() {
session->Start();
state = State::Started;
- std::vector<AudioBuffer> buffers_to_flush{};
+ boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
buffers.RegisterBuffers(buffers_to_flush);
session->AppendBuffers(buffers_to_flush);
session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@@ -134,7 +135,7 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) {
void System::RegisterBuffers() {
if (state == State::Started) {
- std::vector<AudioBuffer> registered_buffers{};
+ boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
buffers.RegisterBuffers(registered_buffers);
session->AppendBuffers(registered_buffers);
}
diff --git a/src/audio_core/out/audio_out_system.cpp b/src/audio_core/out/audio_out_system.cpp
index bd13f7219..0adf64bd3 100644
--- a/src/audio_core/out/audio_out_system.cpp
+++ b/src/audio_core/out/audio_out_system.cpp
@@ -89,7 +89,7 @@ Result System::Start() {
session->Start();
state = State::Started;
- std::vector<AudioBuffer> buffers_to_flush{};
+ boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
buffers.RegisterBuffers(buffers_to_flush);
session->AppendBuffers(buffers_to_flush);
session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@@ -134,7 +134,7 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) {
void System::RegisterBuffers() {
if (state == State::Started) {
- std::vector<AudioBuffer> registered_buffers{};
+ boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
buffers.RegisterBuffers(registered_buffers);
session->AppendBuffers(registered_buffers);
}
diff --git a/src/audio_core/renderer/adsp/adsp.cpp b/src/audio_core/renderer/adsp/adsp.cpp
index 74772fc50..b1db31e93 100644
--- a/src/audio_core/renderer/adsp/adsp.cpp
+++ b/src/audio_core/renderer/adsp/adsp.cpp
@@ -7,7 +7,6 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/memory.h"
namespace AudioCore::AudioRenderer::ADSP {
diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp
index 8bc39f9f9..9ca716b60 100644
--- a/src/audio_core/renderer/adsp/audio_renderer.cpp
+++ b/src/audio_core/renderer/adsp/audio_renderer.cpp
@@ -13,7 +13,6 @@
#include "common/thread.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97));
@@ -144,6 +143,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) {
mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK);
+ // 0.12 seconds (2304000 / 19200000)
constexpr u64 max_process_time{2'304'000ULL};
while (!stop_token.stop_requested()) {
@@ -184,8 +184,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) {
u64 max_time{max_process_time};
if (index == 1 && command_buffer.applet_resource_user_id ==
mailbox->GetCommandBuffer(0).applet_resource_user_id) {
- max_time = max_process_time -
- Core::Timing::CyclesToNs(render_times_taken[0]).count();
+ max_time = max_process_time - render_times_taken[0];
if (render_times_taken[0] > max_process_time) {
max_time = 0;
}
diff --git a/src/audio_core/renderer/adsp/command_list_processor.cpp b/src/audio_core/renderer/adsp/command_list_processor.cpp
index 7a300d216..3a0f1ae38 100644
--- a/src/audio_core/renderer/adsp/command_list_processor.cpp
+++ b/src/audio_core/renderer/adsp/command_list_processor.cpp
@@ -9,7 +9,6 @@
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/memory.h"
namespace AudioCore::AudioRenderer::ADSP {
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp
index ff5d31bd6..f45933203 100644
--- a/src/audio_core/renderer/command/data_source/decode.cpp
+++ b/src/audio_core/renderer/command/data_source/decode.cpp
@@ -8,6 +8,7 @@
#include "audio_core/renderer/command/resample/resample.h"
#include "common/fixed_point.h"
#include "common/logging/log.h"
+#include "common/scratch_buffer.h"
#include "core/memory.h"
namespace AudioCore::AudioRenderer {
@@ -27,6 +28,7 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4};
template <typename T>
static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const DecodeArg& req) {
+ std::array<T, TempBufferSize> tmp_samples{};
constexpr s32 min{std::numeric_limits<s16>::min()};
constexpr s32 max{std::numeric_limits<s16>::max()};
@@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const u64 size{channel_count * samples_to_decode};
const u64 size_bytes{size * sizeof(T)};
- std::vector<T> samples(size);
- memory.ReadBlockUnsafe(source, samples.data(), size_bytes);
+ memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes);
if constexpr (std::is_floating_point_v<T>) {
for (u32 i = 0; i < samples_to_decode; i++) {
- auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] *
+ auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
std::numeric_limits<s16>::max())};
out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
}
} else {
for (u32 i = 0; i < samples_to_decode; i++) {
- out_buffer[i] = samples[i * channel_count + req.target_channel];
+ out_buffer[i] = tmp_samples[i * channel_count + req.target_channel];
}
}
} break;
@@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
}
const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))};
- std::vector<T> samples(samples_to_decode);
- memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T));
+ memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T));
if constexpr (std::is_floating_point_v<T>) {
for (u32 i = 0; i < samples_to_decode; i++) {
- auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] *
+ auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
std::numeric_limits<s16>::max())};
out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
}
} else {
- std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16));
+ std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16));
}
break;
}
@@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
*/
static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const DecodeArg& req) {
+ std::array<u8, TempBufferSize> wavebuffer{};
constexpr u32 SamplesPerFrame{14};
constexpr u32 NibblesPerFrame{16};
@@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
}
const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)};
- std::vector<u8> wavebuffer(size);
- memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(),
- wavebuffer.size());
+ memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size);
auto context{req.adpcm_context};
auto header{context->header};
@@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf
u32 offset{voice_state.offset};
auto output_buffer{args.output};
- std::vector<s16> temp_buffer(TempBufferSize, 0);
+ std::array<s16, TempBufferSize> temp_buffer{};
while (remaining_sample_count > 0) {
const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)};
diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp
index 7229618e8..ee9b68d5b 100644
--- a/src/audio_core/renderer/command/effect/compressor.cpp
+++ b/src/audio_core/renderer/command/effect/compressor.cpp
@@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2&
static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params,
CompressorInfo::State& state, bool enabled,
- std::vector<std::span<const s32>> input_buffers,
- std::vector<std::span<s32>> output_buffers, u32 sample_count) {
+ std::span<std::span<const s32>> input_buffers,
+ std::span<std::span<s32>> output_buffers, u32 sample_count) {
if (enabled) {
auto state_00{state.unk_00};
auto state_04{state.unk_04};
@@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
}
void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) {
- std::vector<std::span<const s32>> input_buffers(parameter.channel_count);
- std::vector<std::span<s32>> output_buffers(parameter.channel_count);
+ std::array<std::span<const s32>, MaxChannels> input_buffers{};
+ std::array<std::span<s32>, MaxChannels> output_buffers{};
for (s16 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp
index a4e408d40..e536cbb1e 100644
--- a/src/audio_core/renderer/command/effect/delay.cpp
+++ b/src/audio_core/renderer/command/effect/delay.cpp
@@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor();
state.delay_lines[channel].sample_count = sample_count.to_int_floor();
state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0);
- if (state.delay_lines[channel].buffer.size() == 0) {
+ if (state.delay_lines[channel].sample_count == 0) {
state.delay_lines[channel].buffer.push_back(0);
}
state.delay_lines[channel].buffer_pos = 0;
@@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
*/
template <size_t NumChannels>
static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
- std::vector<std::span<const s32>>& inputs,
- std::vector<std::span<s32>>& outputs, const u32 sample_count) {
+ std::span<std::span<const s32>> inputs, std::span<std::span<s32>> outputs,
+ const u32 sample_count) {
for (u32 sample_index = 0; sample_index < sample_count; sample_index++) {
std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{};
for (u32 channel = 0; channel < NumChannels; channel++) {
@@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St
* @param sample_count - Number of samples to process.
*/
static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
- const bool enabled, std::vector<std::span<const s32>>& inputs,
- std::vector<std::span<s32>>& outputs, const u32 sample_count) {
+ const bool enabled, std::span<std::span<const s32>> inputs,
+ std::span<std::span<s32>> outputs, const u32 sample_count) {
if (!IsChannelCountValid(params.channel_count)) {
LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count);
@@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce
}
void DelayCommand::Process(const ADSP::CommandListProcessor& processor) {
- std::vector<std::span<const s32>> input_buffers(parameter.channel_count);
- std::vector<std::span<s32>> output_buffers(parameter.channel_count);
+ std::array<std::span<const s32>, MaxChannels> input_buffers{};
+ std::array<std::span<s32>, MaxChannels> output_buffers{};
for (s16 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
index 27d8b9844..d2bfb67cc 100644
--- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
+++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
@@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
}
void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
- std::vector<std::span<const s32>> input_buffers(parameter.channel_count);
- std::vector<std::span<s32>> output_buffers(parameter.channel_count);
+ std::array<std::span<const s32>, MaxChannels> input_buffers{};
+ std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp
index e8fb0e2fc..4161a9821 100644
--- a/src/audio_core/renderer/command/effect/light_limiter.cpp
+++ b/src/audio_core/renderer/command/effect/light_limiter.cpp
@@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio
*/
static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params,
LightLimiterInfo::State& state, const bool enabled,
- std::vector<std::span<const s32>>& inputs,
- std::vector<std::span<s32>>& outputs, const u32 sample_count,
+ std::span<std::span<const s32>> inputs,
+ std::span<std::span<s32>> outputs, const u32 sample_count,
LightLimiterInfo::StatisticsInternal* statistics) {
constexpr s64 min{std::numeric_limits<s32>::min()};
constexpr s64 max{std::numeric_limits<s32>::max()};
@@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP
}
void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) {
- std::vector<std::span<const s32>> input_buffers(parameter.channel_count);
- std::vector<std::span<s32>> output_buffers(parameter.channel_count);
+ std::array<std::span<const s32>, MaxChannels> input_buffers{};
+ std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
@@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP
}
void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) {
- std::vector<std::span<const s32>> input_buffers(parameter.channel_count);
- std::vector<std::span<s32>> output_buffers(parameter.channel_count);
+ std::array<std::span<const s32>, MaxChannels> input_buffers{};
+ std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp
index 8b9b65214..fc2f15a5e 100644
--- a/src/audio_core/renderer/command/effect/reverb.cpp
+++ b/src/audio_core/renderer/command/effect/reverb.cpp
@@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine&
*/
template <size_t NumChannels>
static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
- std::vector<std::span<const s32>>& inputs,
- std::vector<std::span<s32>>& outputs, const u32 sample_count) {
+ std::span<std::span<const s32>> inputs,
+ std::span<std::span<s32>> outputs, const u32 sample_count) {
static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
@@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever
* @param sample_count - Number of samples to process.
*/
static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
- const bool enabled, std::vector<std::span<const s32>>& inputs,
- std::vector<std::span<s32>>& outputs, const u32 sample_count) {
+ const bool enabled, std::span<std::span<const s32>> inputs,
+ std::span<std::span<s32>> outputs, const u32 sample_count) {
if (enabled) {
switch (params.channel_count) {
case 0:
@@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc
}
void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
- std::vector<std::span<const s32>> input_buffers(parameter.channel_count);
- std::vector<std::span<s32>> output_buffers(parameter.channel_count);
+ std::array<std::span<const s32>, MaxChannels> input_buffers{};
+ std::array<std::span<s32>, MaxChannels> output_buffers{};
for (u32 i = 0; i < parameter.channel_count; i++) {
input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp
index 985958b03..4a881547f 100644
--- a/src/audio_core/renderer/command/performance/performance.cpp
+++ b/src/audio_core/renderer/command/performance/performance.cpp
@@ -5,7 +5,6 @@
#include "audio_core/renderer/command/performance/performance.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
namespace AudioCore::AudioRenderer {
@@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) {
auto base{entry_address.translated_address};
if (state == PerformanceState::Start) {
auto start_time_ptr{reinterpret_cast<u32*>(base + entry_address.entry_start_time_offset)};
- *start_time_ptr = static_cast<u32>(
- Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() -
- processor.start_time - processor.current_processing_time)
- .count());
+ *start_time_ptr =
+ static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time -
+ processor.current_processing_time);
} else if (state == PerformanceState::Stop) {
auto processed_time_ptr{
reinterpret_cast<u32*>(base + entry_address.entry_processed_time_offset)};
auto entry_count_ptr{
reinterpret_cast<u32*>(base + entry_address.header_entry_count_offset)};
- *processed_time_ptr = static_cast<u32>(
- Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() -
- processor.start_time - processor.current_processing_time)
- .count());
+ *processed_time_ptr =
+ static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time -
+ processor.current_processing_time);
(*entry_count_ptr)++;
}
}
diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp
index ded5afc94..e2ce59792 100644
--- a/src/audio_core/renderer/command/sink/circular_buffer.cpp
+++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp
@@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
constexpr s32 min{std::numeric_limits<s16>::min()};
constexpr s32 max{std::numeric_limits<s16>::max()};
- std::vector<s16> output(processor.sample_count);
+ std::array<s16, TargetSampleCount * MaxChannels> output{};
for (u32 channel = 0; channel < input_count; channel++) {
auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count,
processor.sample_count)};
@@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
}
processor.memory->WriteBlockUnsafe(address + pos, output.data(),
- output.size() * sizeof(s16));
+ processor.sample_count * sizeof(s16));
pos += static_cast<u32>(processor.sample_count * sizeof(s16));
if (pos >= size) {
pos = 0;
diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp
index e88372a75..5f74dd7ad 100644
--- a/src/audio_core/renderer/command/sink/device.cpp
+++ b/src/audio_core/renderer/command/sink/device.cpp
@@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
.consumed{false},
};
- std::vector<s16> samples(out_buffer.frames * input_count);
-
+ std::array<s16, TargetSampleCount * MaxChannels> samples{};
for (u32 channel = 0; channel < input_count; channel++) {
const auto offset{inputs[channel] * out_buffer.frames};
@@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
}
out_buffer.tag = reinterpret_cast<u64>(samples.data());
- stream->AppendBuffer(out_buffer, samples);
+ stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count});
if (stream->IsPaused()) {
stream->Start();
diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp
index 35b748ede..3a18ae7c2 100644
--- a/src/audio_core/renderer/mix/mix_context.cpp
+++ b/src/audio_core/renderer/mix/mix_context.cpp
@@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) {
return false;
}
- std::vector<s32> sorted_results{node_states.GetSortedResuls()};
- const auto result_size{std::min(count, static_cast<s32>(sorted_results.size()))};
+ auto sorted_results{node_states.GetSortedResuls()};
+ const auto result_size{std::min(count, static_cast<s32>(sorted_results.second))};
for (s32 i = 0; i < result_size; i++) {
- sorted_mix_infos[i] = &mix_infos[sorted_results[i]];
+ sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]];
}
CalcMixBufferOffset();
diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp
index 1821a51e6..b7a44a54c 100644
--- a/src/audio_core/renderer/nodes/node_states.cpp
+++ b/src/audio_core/renderer/nodes/node_states.cpp
@@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const {
return node_count;
}
-std::vector<s32> NodeStates::GetSortedResuls() const {
- return {results.rbegin(), results.rbegin() + result_pos};
+std::pair<std::span<u32>::reverse_iterator, size_t> NodeStates::GetSortedResuls() const {
+ return {results.rbegin(), result_pos};
}
} // namespace AudioCore::AudioRenderer
diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h
index 94b1d1254..e768cd4b5 100644
--- a/src/audio_core/renderer/nodes/node_states.h
+++ b/src/audio_core/renderer/nodes/node_states.h
@@ -175,7 +175,7 @@ public:
*
* @return Vector of nodes in reverse order.
*/
- std::vector<s32> GetSortedResuls() const;
+ std::pair<std::span<u32>::reverse_iterator, size_t> GetSortedResuls() const;
private:
/// Number of nodes in the graph
diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp
index 53b258c4f..a23627472 100644
--- a/src/audio_core/renderer/system.cpp
+++ b/src/audio_core/renderer/system.cpp
@@ -444,6 +444,7 @@ Result System::Update(std::span<const u8> input, std::span<u8> performance, std:
std::scoped_lock l{lock};
const auto start_time{core.CoreTiming().GetClockTicks()};
+ std::memset(output.data(), 0, output.size());
InfoUpdater info_updater(input, output, process_handle, behavior);
diff --git a/src/audio_core/sink/null_sink.h b/src/audio_core/sink/null_sink.h
index 1215d3cd2..b6b43c93e 100644
--- a/src/audio_core/sink/null_sink.h
+++ b/src/audio_core/sink/null_sink.h
@@ -20,7 +20,7 @@ public:
explicit NullSinkStreamImpl(Core::System& system_, StreamType type_)
: SinkStream{system_, type_} {}
~NullSinkStreamImpl() override {}
- void AppendBuffer(SinkBuffer&, std::vector<s16>&) override {}
+ void AppendBuffer(SinkBuffer&, std::span<s16>) override {}
std::vector<s16> ReleaseBuffer(u64) override {
return {};
}
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp
index f44fedfd5..404dcd0e9 100644
--- a/src/audio_core/sink/sink_stream.cpp
+++ b/src/audio_core/sink/sink_stream.cpp
@@ -15,11 +15,10 @@
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
namespace AudioCore::Sink {
-void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
+void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) {
if (type == StreamType::In) {
queue.enqueue(buffer);
queued_buffers++;
@@ -67,15 +66,16 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
static_cast<s16>(std::clamp(right_sample, min, max));
}
- samples.resize(samples.size() / system_channels * device_channels);
+ samples = samples.subspan(0, samples.size() / system_channels * device_channels);
} else if (system_channels == 2 && device_channels == 6) {
// We need moar samples! Not all games will provide 6 channel audio.
// TODO: Implement some upmixing here. Currently just passthrough, with other
// channels left as silence.
- std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0);
+ auto new_size = samples.size() / system_channels * device_channels;
+ tmp_samples.resize_destructive(new_size);
- for (u32 read_index = 0, write_index = 0; read_index < samples.size();
+ for (u32 read_index = 0, write_index = 0; read_index < new_size;
read_index += system_channels, write_index += device_channels) {
const auto left_sample{static_cast<s16>(std::clamp(
static_cast<s32>(
@@ -83,7 +83,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
volume),
min, max))};
- new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
+ tmp_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
const auto right_sample{static_cast<s16>(std::clamp(
static_cast<s32>(
@@ -91,9 +91,9 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
volume),
min, max))};
- new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample;
+ tmp_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample;
}
- samples = std::move(new_samples);
+ samples = std::span<s16>(tmp_samples);
} else if (volume != 1.0f) {
for (u32 i = 0; i < samples.size(); i++) {
diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h
index 41cbadc9c..98d72ace1 100644
--- a/src/audio_core/sink/sink_stream.h
+++ b/src/audio_core/sink/sink_stream.h
@@ -16,6 +16,7 @@
#include "common/polyfill_thread.h"
#include "common/reader_writer_queue.h"
#include "common/ring_buffer.h"
+#include "common/scratch_buffer.h"
#include "common/thread.h"
namespace Core {
@@ -170,7 +171,7 @@ public:
* @param buffer - Audio buffer information to be queued.
* @param samples - The s16 samples to be queue for playback.
*/
- virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples);
+ virtual void AppendBuffer(SinkBuffer& buffer, std::span<s16> samples);
/**
* Release a buffer. Audio In only, will fill a buffer with recorded samples.
@@ -255,6 +256,8 @@ private:
/// Signalled when ring buffer entries are consumed
std::condition_variable_any release_cv;
std::mutex release_mutex;
+ /// Temporary buffer for appending samples when upmixing
+ Common::ScratchBuffer<s16> tmp_samples{};
};
using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index efc4a9fe9..3adf13a3f 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64)
x64/cpu_wait.h
x64/native_clock.cpp
x64/native_clock.h
+ x64/rdtsc.cpp
+ x64/rdtsc.h
x64/xbyak_abi.h
x64/xbyak_util.h
)
diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp
index 6d66c926d..1baf6d746 100644
--- a/src/common/fs/fs.cpp
+++ b/src/common/fs/fs.cpp
@@ -436,7 +436,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable
if (True(filter & DirEntryFilter::File) &&
entry.status().type() == fs::file_type::regular) {
- if (!callback(entry.path())) {
+ if (!callback(entry)) {
callback_error = true;
break;
}
@@ -444,7 +444,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable
if (True(filter & DirEntryFilter::Directory) &&
entry.status().type() == fs::file_type::directory) {
- if (!callback(entry.path())) {
+ if (!callback(entry)) {
callback_error = true;
break;
}
@@ -493,7 +493,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path,
if (True(filter & DirEntryFilter::File) &&
entry.status().type() == fs::file_type::regular) {
- if (!callback(entry.path())) {
+ if (!callback(entry)) {
callback_error = true;
break;
}
@@ -501,7 +501,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path,
if (True(filter & DirEntryFilter::Directory) &&
entry.status().type() == fs::file_type::directory) {
- if (!callback(entry.path())) {
+ if (!callback(entry)) {
callback_error = true;
break;
}
diff --git a/src/common/fs/fs_types.h b/src/common/fs/fs_types.h
index 5a4090c19..900f85d24 100644
--- a/src/common/fs/fs_types.h
+++ b/src/common/fs/fs_types.h
@@ -66,6 +66,6 @@ DECLARE_ENUM_FLAG_OPERATORS(DirEntryFilter);
* @returns A boolean value.
* Return true to indicate whether the callback is successful, false otherwise.
*/
-using DirEntryCallable = std::function<bool(const std::filesystem::path& path)>;
+using DirEntryCallable = std::function<bool(const std::filesystem::directory_entry& entry)>;
} // namespace Common::FS
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
index 4c328ab44..416680d44 100644
--- a/src/common/ring_buffer.h
+++ b/src/common/ring_buffer.h
@@ -9,6 +9,7 @@
#include <cstddef>
#include <cstring>
#include <new>
+#include <span>
#include <type_traits>
#include <vector>
@@ -53,7 +54,7 @@ public:
return push_count;
}
- std::size_t Push(const std::vector<T>& input) {
+ std::size_t Push(const std::span<T> input) {
return Push(input.data(), input.size());
}
diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h
index a69a5a7af..6fe907953 100644
--- a/src/common/scratch_buffer.h
+++ b/src/common/scratch_buffer.h
@@ -3,6 +3,9 @@
#pragma once
+#include <iterator>
+
+#include "common/concepts.h"
#include "common/make_unique_for_overwrite.h"
namespace Common {
@@ -16,6 +19,12 @@ namespace Common {
template <typename T>
class ScratchBuffer {
public:
+ using iterator = T*;
+ using const_iterator = const T*;
+ using value_type = T;
+ using element_type = T;
+ using iterator_category = std::contiguous_iterator_tag;
+
ScratchBuffer() = default;
explicit ScratchBuffer(size_t initial_capacity)
diff --git a/src/common/settings.h b/src/common/settings.h
index 9682281b0..3aedf3850 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -483,6 +483,7 @@ struct Values {
AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,
"astc_recompression"};
SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"};
+ SwitchableSetting<bool> barrier_feedback_loops{true, "barrier_feedback_loops"};
SwitchableSetting<u8> bg_red{0, "bg_red"};
SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp
index 782859196..9415eed29 100644
--- a/src/common/steady_clock.cpp
+++ b/src/common/steady_clock.cpp
@@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() {
// GetSystemTimePreciseAsFileTime returns the file time in 100ns units.
static constexpr s64 Multiplier = 100;
// Convert Windows epoch to Unix epoch.
- static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL;
+ static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL;
FILETIME filetime;
GetSystemTimePreciseAsFileTime(&filetime);
return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) +
- static_cast<s64>(filetime.dwLowDateTime)) -
- WindowsEpochToUnixEpochNS;
+ static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch);
}
#endif
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 817e71d52..dc0dcbd68 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -2,88 +2,75 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/steady_clock.h"
-#include "common/uint128.h"
#include "common/wall_clock.h"
#ifdef ARCHITECTURE_x86_64
#include "common/x64/cpu_detect.h"
#include "common/x64/native_clock.h"
+#include "common/x64/rdtsc.h"
#endif
namespace Common {
class StandardWallClock final : public WallClock {
public:
- explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
- : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false},
- start_time{SteadyClock::Now()} {}
+ explicit StandardWallClock() : start_time{SteadyClock::Now()} {}
- std::chrono::nanoseconds GetTimeNS() override {
+ std::chrono::nanoseconds GetTimeNS() const override {
return SteadyClock::Now() - start_time;
}
- std::chrono::microseconds GetTimeUS() override {
- return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS());
+ std::chrono::microseconds GetTimeUS() const override {
+ return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den);
}
- std::chrono::milliseconds GetTimeMS() override {
- return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS());
+ std::chrono::milliseconds GetTimeMS() const override {
+ return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den);
}
- u64 GetClockCycles() override {
- const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency);
- return Common::Divide128On32(temp, NS_RATIO).first;
+ u64 GetCNTPCT() const override {
+ return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
}
- u64 GetCPUCycles() override {
- const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency);
- return Common::Divide128On32(temp, NS_RATIO).first;
+ u64 GetGPUTick() const override {
+ return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
}
- void Pause([[maybe_unused]] bool is_paused) override {
- // Do nothing in this clock type.
+ u64 GetHostTicksNow() const override {
+ return static_cast<u64>(SteadyClock::Now().time_since_epoch().count());
+ }
+
+ u64 GetHostTicksElapsed() const override {
+ return static_cast<u64>(GetTimeNS().count());
+ }
+
+ bool IsNative() const override {
+ return false;
}
private:
SteadyClock::time_point start_time;
};
+std::unique_ptr<WallClock> CreateOptimalClock() {
#ifdef ARCHITECTURE_x86_64
-
-std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
- u64 emulated_clock_frequency) {
const auto& caps = GetCPUCaps();
- u64 rtsc_frequency = 0;
- if (caps.invariant_tsc) {
- rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency();
- }
- // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than:
- // - A nanosecond
- // - The emulated CPU frequency
- // - The emulated clock counter frequency (CNTFRQ)
- if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency ||
- rtsc_frequency <= emulated_clock_frequency) {
- return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
- emulated_clock_frequency);
+ if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) {
+ return std::make_unique<X64::NativeClock>(caps.tsc_frequency);
} else {
- return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
- rtsc_frequency);
+ // Fallback to StandardWallClock if the hardware TSC
+ // - Is not invariant
+ // - Is not more precise than GPUTickFreq
+ return std::make_unique<StandardWallClock>();
}
-}
-
#else
-
-std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
- u64 emulated_clock_frequency) {
- return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
-}
-
+ return std::make_unique<StandardWallClock>();
#endif
+}
-std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
- u64 emulated_clock_frequency) {
- return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateStandardWallClock() {
+ return std::make_unique<StandardWallClock>();
}
} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index 157ec5eae..f45d3d8c5 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -5,6 +5,7 @@
#include <chrono>
#include <memory>
+#include <ratio>
#include "common/common_types.h"
@@ -12,50 +13,82 @@ namespace Common {
class WallClock {
public:
- static constexpr u64 NS_RATIO = 1'000'000'000;
- static constexpr u64 US_RATIO = 1'000'000;
- static constexpr u64 MS_RATIO = 1'000;
+ static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
+ static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz
+ static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz
virtual ~WallClock() = default;
- /// Returns current wall time in nanoseconds
- [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0;
+ /// @returns The time in nanoseconds since the construction of this clock.
+ virtual std::chrono::nanoseconds GetTimeNS() const = 0;
- /// Returns current wall time in microseconds
- [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0;
+ /// @returns The time in microseconds since the construction of this clock.
+ virtual std::chrono::microseconds GetTimeUS() const = 0;
- /// Returns current wall time in milliseconds
- [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0;
+ /// @returns The time in milliseconds since the construction of this clock.
+ virtual std::chrono::milliseconds GetTimeMS() const = 0;
- /// Returns current wall time in emulated clock cycles
- [[nodiscard]] virtual u64 GetClockCycles() = 0;
+ /// @returns The guest CNTPCT ticks since the construction of this clock.
+ virtual u64 GetCNTPCT() const = 0;
- /// Returns current wall time in emulated cpu cycles
- [[nodiscard]] virtual u64 GetCPUCycles() = 0;
+ /// @returns The guest GPU ticks since the construction of this clock.
+ virtual u64 GetGPUTick() const = 0;
- virtual void Pause(bool is_paused) = 0;
+ /// @returns The raw host timer ticks since an indeterminate epoch.
+ virtual u64 GetHostTicksNow() const = 0;
- /// Tells if the wall clock, uses the host CPU's hardware clock
- [[nodiscard]] bool IsNative() const {
- return is_native;
+ /// @returns The raw host timer ticks since the construction of this clock.
+ virtual u64 GetHostTicksElapsed() const = 0;
+
+ /// @returns Whether the clock directly uses the host's hardware clock.
+ virtual bool IsNative() const = 0;
+
+ static inline u64 NSToCNTPCT(u64 ns) {
+ return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
+ }
+
+ static inline u64 NSToGPUTick(u64 ns) {
+ return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
+ }
+
+ // Cycle Timing
+
+ static inline u64 CPUTickToNS(u64 cpu_tick) {
+ return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den;
+ }
+
+ static inline u64 CPUTickToUS(u64 cpu_tick) {
+ return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den;
+ }
+
+ static inline u64 CPUTickToCNTPCT(u64 cpu_tick) {
+ return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den;
+ }
+
+ static inline u64 CPUTickToGPUTick(u64 cpu_tick) {
+ return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den;
}
protected:
- explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_)
- : emulated_cpu_frequency{emulated_cpu_frequency_},
- emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {}
+ using NsRatio = std::nano;
+ using UsRatio = std::micro;
+ using MsRatio = std::milli;
+
+ using NsToUsRatio = std::ratio_divide<std::nano, std::micro>;
+ using NsToMsRatio = std::ratio_divide<std::nano, std::milli>;
+ using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>;
+ using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>;
- u64 emulated_cpu_frequency;
- u64 emulated_clock_frequency;
+ // Cycle Timing
-private:
- bool is_native;
+ using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>;
+ using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>;
+ using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>;
+ using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
};
-[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
- u64 emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateOptimalClock();
-[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
- u64 emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateStandardWallClock();
} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 72ed6e96c..c998b1197 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -14,6 +14,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/x64/cpu_detect.h"
+#include "common/x64/rdtsc.h"
#ifdef _WIN32
#include <windows.h>
@@ -187,6 +188,8 @@ static CPUCaps Detect() {
caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *
caps.tsc_crystal_ratio_numerator /
caps.tsc_crystal_ratio_denominator;
+ } else {
+ caps.tsc_frequency = X64::EstimateRDTSCFrequency();
}
}
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
index cfeef6a3d..c53dd4945 100644
--- a/src/common/x64/cpu_wait.cpp
+++ b/src/common/x64/cpu_wait.cpp
@@ -9,19 +9,11 @@
#include "common/x64/cpu_detect.h"
#include "common/x64/cpu_wait.h"
+#include "common/x64/rdtsc.h"
namespace Common::X64 {
#ifdef _MSC_VER
-__forceinline static u64 FencedRDTSC() {
- _mm_lfence();
- _ReadWriteBarrier();
- const u64 result = __rdtsc();
- _mm_lfence();
- _ReadWriteBarrier();
- return result;
-}
-
__forceinline static void TPAUSE() {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference:
@@ -32,16 +24,6 @@ __forceinline static void TPAUSE() {
_tpause(0, FencedRDTSC() + PauseCycles);
}
#else
-static u64 FencedRDTSC() {
- u64 eax;
- u64 edx;
- asm volatile("lfence\n\t"
- "rdtsc\n\t"
- "lfence\n\t"
- : "=a"(eax), "=d"(edx));
- return (edx << 32) | eax;
-}
-
static void TPAUSE() {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference:
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 277b00662..7d2a26bd9 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -1,164 +1,50 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <array>
-#include <chrono>
-#include <thread>
-
-#include "common/atomic_ops.h"
-#include "common/steady_clock.h"
#include "common/uint128.h"
#include "common/x64/native_clock.h"
+#include "common/x64/rdtsc.h"
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
-namespace Common {
+namespace Common::X64 {
-#ifdef _MSC_VER
-__forceinline static u64 FencedRDTSC() {
- _mm_lfence();
- _ReadWriteBarrier();
- const u64 result = __rdtsc();
- _mm_lfence();
- _ReadWriteBarrier();
- return result;
-}
-#else
-static u64 FencedRDTSC() {
- u64 eax;
- u64 edx;
- asm volatile("lfence\n\t"
- "rdtsc\n\t"
- "lfence\n\t"
- : "=a"(eax), "=d"(edx));
- return (edx << 32) | eax;
-}
-#endif
+NativeClock::NativeClock(u64 rdtsc_frequency_)
+ : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_},
+ ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)},
+ us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)},
+ ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)},
+ cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)},
+ gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {}
-template <u64 Nearest>
-static u64 RoundToNearest(u64 value) {
- const auto mod = value % Nearest;
- return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
+std::chrono::nanoseconds NativeClock::GetTimeNS() const {
+ return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)};
}
-u64 EstimateRDTSCFrequency() {
- // Discard the first result measuring the rdtsc.
- FencedRDTSC();
- std::this_thread::sleep_for(std::chrono::milliseconds{1});
- FencedRDTSC();
-
- // Get the current time.
- const auto start_time = Common::RealTimeClock::Now();
- const u64 tsc_start = FencedRDTSC();
- // Wait for 250 milliseconds.
- std::this_thread::sleep_for(std::chrono::milliseconds{250});
- const auto end_time = Common::RealTimeClock::Now();
- const u64 tsc_end = FencedRDTSC();
- // Calculate differences.
- const u64 timer_diff = static_cast<u64>(
- std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
- const u64 tsc_diff = tsc_end - tsc_start;
- const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
- return RoundToNearest<1000>(tsc_freq);
+std::chrono::microseconds NativeClock::GetTimeUS() const {
+ return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)};
}
-namespace X64 {
-NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_,
- u64 rtsc_frequency_)
- : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
- rtsc_frequency_} {
- // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed.
- time_sync_thread = std::jthread{[this](std::stop_token token) {
- // Get the current time.
- const auto start_time = Common::RealTimeClock::Now();
- const u64 tsc_start = FencedRDTSC();
- // Wait for 10 seconds.
- if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) {
- return;
- }
- const auto end_time = Common::RealTimeClock::Now();
- const u64 tsc_end = FencedRDTSC();
- // Calculate differences.
- const u64 timer_diff = static_cast<u64>(
- std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
- const u64 tsc_diff = tsc_end - tsc_start;
- const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
- rtsc_frequency = tsc_freq;
- CalculateAndSetFactors();
- }};
-
- time_point.inner.last_measure = FencedRDTSC();
- time_point.inner.accumulated_ticks = 0U;
- CalculateAndSetFactors();
+std::chrono::milliseconds NativeClock::GetTimeMS() const {
+ return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)};
}
-u64 NativeClock::GetRTSC() {
- TimePoint new_time_point{};
- TimePoint current_time_point{};
-
- current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
- do {
- const u64 current_measure = FencedRDTSC();
- u64 diff = current_measure - current_time_point.inner.last_measure;
- diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
- new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
- ? current_measure
- : current_time_point.inner.last_measure;
- new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
- } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
- current_time_point.pack, current_time_point.pack));
- return new_time_point.inner.accumulated_ticks;
+u64 NativeClock::GetCNTPCT() const {
+ return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor);
}
-void NativeClock::Pause(bool is_paused) {
- if (!is_paused) {
- TimePoint current_time_point{};
- TimePoint new_time_point{};
-
- current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
- do {
- new_time_point.pack = current_time_point.pack;
- new_time_point.inner.last_measure = FencedRDTSC();
- } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
- current_time_point.pack, current_time_point.pack));
- }
+u64 NativeClock::GetGPUTick() const {
+ return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor);
}
-std::chrono::nanoseconds NativeClock::GetTimeNS() {
- const u64 rtsc_value = GetRTSC();
- return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
+u64 NativeClock::GetHostTicksNow() const {
+ return FencedRDTSC();
}
-std::chrono::microseconds NativeClock::GetTimeUS() {
- const u64 rtsc_value = GetRTSC();
- return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
+u64 NativeClock::GetHostTicksElapsed() const {
+ return FencedRDTSC() - start_ticks;
}
-std::chrono::milliseconds NativeClock::GetTimeMS() {
- const u64 rtsc_value = GetRTSC();
- return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
+bool NativeClock::IsNative() const {
+ return true;
}
-u64 NativeClock::GetClockCycles() {
- const u64 rtsc_value = GetRTSC();
- return MultiplyHigh(rtsc_value, clock_rtsc_factor);
-}
-
-u64 NativeClock::GetCPUCycles() {
- const u64 rtsc_value = GetRTSC();
- return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
-}
-
-void NativeClock::CalculateAndSetFactors() {
- ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
- us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
- ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency);
- clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
- cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
-}
-
-} // namespace X64
-
-} // namespace Common
+} // namespace Common::X64
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 03ca291d8..334415eff 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -3,58 +3,39 @@
#pragma once
-#include "common/polyfill_thread.h"
#include "common/wall_clock.h"
-namespace Common {
+namespace Common::X64 {
-namespace X64 {
class NativeClock final : public WallClock {
public:
- explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_,
- u64 rtsc_frequency_);
+ explicit NativeClock(u64 rdtsc_frequency_);
- std::chrono::nanoseconds GetTimeNS() override;
+ std::chrono::nanoseconds GetTimeNS() const override;
- std::chrono::microseconds GetTimeUS() override;
+ std::chrono::microseconds GetTimeUS() const override;
- std::chrono::milliseconds GetTimeMS() override;
+ std::chrono::milliseconds GetTimeMS() const override;
- u64 GetClockCycles() override;
+ u64 GetCNTPCT() const override;
- u64 GetCPUCycles() override;
+ u64 GetGPUTick() const override;
- void Pause(bool is_paused) override;
+ u64 GetHostTicksNow() const override;
-private:
- u64 GetRTSC();
-
- void CalculateAndSetFactors();
-
- union alignas(16) TimePoint {
- TimePoint() : pack{} {}
- u128 pack{};
- struct Inner {
- u64 last_measure{};
- u64 accumulated_ticks{};
- } inner;
- };
-
- TimePoint time_point;
+ u64 GetHostTicksElapsed() const override;
- // factors
- u64 clock_rtsc_factor{};
- u64 cpu_rtsc_factor{};
- u64 ns_rtsc_factor{};
- u64 us_rtsc_factor{};
- u64 ms_rtsc_factor{};
+ bool IsNative() const override;
- u64 rtsc_frequency;
-
- std::jthread time_sync_thread;
+private:
+ u64 start_ticks;
+ u64 rdtsc_frequency;
+
+ u64 ns_rdtsc_factor;
+ u64 us_rdtsc_factor;
+ u64 ms_rdtsc_factor;
+ u64 cntpct_rdtsc_factor;
+ u64 gputick_rdtsc_factor;
};
-} // namespace X64
-
-u64 EstimateRDTSCFrequency();
-} // namespace Common
+} // namespace Common::X64
diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp
new file mode 100644
index 000000000..9273274a3
--- /dev/null
+++ b/src/common/x64/rdtsc.cpp
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <thread>
+
+#include "common/steady_clock.h"
+#include "common/uint128.h"
+#include "common/x64/rdtsc.h"
+
+namespace Common::X64 {
+
+template <u64 Nearest>
+static u64 RoundToNearest(u64 value) {
+ const auto mod = value % Nearest;
+ return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
+}
+
+u64 EstimateRDTSCFrequency() {
+ // Discard the first result measuring the rdtsc.
+ FencedRDTSC();
+ std::this_thread::sleep_for(std::chrono::milliseconds{1});
+ FencedRDTSC();
+
+ // Get the current time.
+ const auto start_time = RealTimeClock::Now();
+ const u64 tsc_start = FencedRDTSC();
+ // Wait for 100 milliseconds.
+ std::this_thread::sleep_for(std::chrono::milliseconds{100});
+ const auto end_time = RealTimeClock::Now();
+ const u64 tsc_end = FencedRDTSC();
+ // Calculate differences.
+ const u64 timer_diff = static_cast<u64>(
+ std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
+ const u64 tsc_diff = tsc_end - tsc_start;
+ const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
+ return RoundToNearest<100'000>(tsc_freq);
+}
+
+} // namespace Common::X64
diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h
new file mode 100644
index 000000000..0ec4f52f9
--- /dev/null
+++ b/src/common/x64/rdtsc.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#include "common/common_types.h"
+
+namespace Common::X64 {
+
+#ifdef _MSC_VER
+__forceinline static u64 FencedRDTSC() {
+ _mm_lfence();
+ _ReadWriteBarrier();
+ const u64 result = __rdtsc();
+ _mm_lfence();
+ _ReadWriteBarrier();
+ return result;
+}
+#else
+static inline u64 FencedRDTSC() {
+ u64 eax;
+ u64 edx;
+ asm volatile("lfence\n\t"
+ "rdtsc\n\t"
+ "lfence\n\t"
+ : "=a"(eax), "=d"(edx));
+ return (edx << 32) | eax;
+}
+#endif
+
+u64 EstimateRDTSCFrequency();
+
+} // namespace Common::X64
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 227c431bc..3655b8478 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -14,7 +14,6 @@ add_library(core STATIC
core.h
core_timing.cpp
core_timing.h
- core_timing_util.h
cpu_manager.cpp
cpu_manager.h
crypto/aes_util.cpp
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 4f2692b05..4f0a3f8ea 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -16,12 +16,11 @@
#include "common/microprofile.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/hardware_properties.h"
namespace Core::Timing {
-constexpr s64 MAX_SLICE_LENGTH = 4000;
+constexpr s64 MAX_SLICE_LENGTH = 10000;
std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
return std::make_shared<EventType>(std::move(callback), std::move(name));
@@ -45,9 +44,7 @@ struct CoreTiming::Event {
}
};
-CoreTiming::CoreTiming()
- : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
- event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
+CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {}
CoreTiming::~CoreTiming() {
Reset();
@@ -68,7 +65,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
on_thread_init = std::move(on_thread_init_);
event_fifo_id = 0;
shutting_down = false;
- ticks = 0;
+ cpu_ticks = 0;
const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds)
-> std::optional<std::chrono::nanoseconds> { return std::nullopt; };
ev_lost = CreateEvent("_lost_event", empty_timed_callback);
@@ -173,38 +170,30 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
}
void CoreTiming::AddTicks(u64 ticks_to_add) {
- ticks += ticks_to_add;
- downcount -= static_cast<s64>(ticks);
+ cpu_ticks += ticks_to_add;
+ downcount -= static_cast<s64>(cpu_ticks);
}
void CoreTiming::Idle() {
- if (!event_queue.empty()) {
- const u64 next_event_time = event_queue.front().time;
- const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
- if (next_ticks > ticks) {
- ticks = next_ticks;
- }
- return;
- }
- ticks += 1000U;
+ cpu_ticks += 1000U;
}
void CoreTiming::ResetTicks() {
downcount = MAX_SLICE_LENGTH;
}
-u64 CoreTiming::GetCPUTicks() const {
+u64 CoreTiming::GetClockTicks() const {
if (is_multicore) [[likely]] {
- return cpu_clock->GetCPUCycles();
+ return clock->GetCNTPCT();
}
- return ticks;
+ return Common::WallClock::CPUTickToCNTPCT(cpu_ticks);
}
-u64 CoreTiming::GetClockTicks() const {
+u64 CoreTiming::GetGPUTicks() const {
if (is_multicore) [[likely]] {
- return cpu_clock->GetClockCycles();
+ return clock->GetGPUTick();
}
- return CpuCyclesToClockCycles(ticks);
+ return Common::WallClock::CPUTickToGPUTick(cpu_ticks);
}
std::optional<s64> CoreTiming::Advance() {
@@ -297,9 +286,7 @@ void CoreTiming::ThreadLoop() {
}
paused_set = true;
- event_clock->Pause(true);
pause_event.Wait();
- event_clock->Pause(false);
}
}
@@ -315,25 +302,18 @@ void CoreTiming::Reset() {
has_started = false;
}
-std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
- if (is_multicore) [[likely]] {
- return cpu_clock->GetTimeNS();
- }
- return CyclesToNs(ticks);
-}
-
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
if (is_multicore) [[likely]] {
- return event_clock->GetTimeNS();
+ return clock->GetTimeNS();
}
- return CyclesToNs(ticks);
+ return std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)};
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
if (is_multicore) [[likely]] {
- return event_clock->GetTimeUS();
+ return clock->GetTimeUS();
}
- return CyclesToUs(ticks);
+ return std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)};
}
} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index e7c4a949f..10db1de55 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -116,14 +116,11 @@ public:
return downcount;
}
- /// Returns current time in emulated CPU cycles
- u64 GetCPUTicks() const;
-
- /// Returns current time in emulated in Clock cycles
+ /// Returns the current CNTPCT tick value.
u64 GetClockTicks() const;
- /// Returns current time in nanoseconds.
- std::chrono::nanoseconds GetCPUTimeNs() const;
+ /// Returns the current GPU tick value.
+ u64 GetGPUTicks() const;
/// Returns current time in microseconds.
std::chrono::microseconds GetGlobalTimeUs() const;
@@ -142,8 +139,7 @@ private:
void Reset();
- std::unique_ptr<Common::WallClock> cpu_clock;
- std::unique_ptr<Common::WallClock> event_clock;
+ std::unique_ptr<Common::WallClock> clock;
s64 global_timer = 0;
@@ -171,7 +167,7 @@ private:
s64 pause_end_time{};
/// Cycle timing
- u64 ticks{};
+ u64 cpu_ticks{};
s64 downcount{};
};
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
deleted file mode 100644
index fe5aaefc7..000000000
--- a/src/core/core_timing_util.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <chrono>
-
-#include "common/common_types.h"
-#include "core/hardware_properties.h"
-
-namespace Core::Timing {
-
-namespace detail {
-constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
-constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
-} // namespace detail
-
-[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
- return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
-}
-
-[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
- return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
-}
-
-[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
- return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
-}
-
-[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
- return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
-}
-
-[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
- return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
-}
-
-[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
- return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
-}
-
-[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
- return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
-}
-
-[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
- return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
-}
-
-[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
- return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
-}
-
-[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
- return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
-}
-
-} // namespace Core::Timing
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 4e61d4335..d3286b352 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -153,7 +153,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
std::vector<VirtualDir> patch_dirs = {sdmc_load_dir};
- if (load_dir != nullptr && load_dir->GetSize() > 0) {
+ if (load_dir != nullptr) {
const auto load_patch_dirs = load_dir->GetSubdirectories();
patch_dirs.insert(patch_dirs.end(), load_patch_dirs.begin(), load_patch_dirs.end());
}
@@ -354,8 +354,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
const auto load_dir = fs_controller.GetModificationLoadRoot(title_id);
const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
- ((load_dir == nullptr || load_dir->GetSize() <= 0) &&
- (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) {
+ (load_dir == nullptr && sdmc_load_dir == nullptr)) {
return;
}
@@ -496,7 +495,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
// General Mods (LayeredFS and IPS)
const auto mod_dir = fs_controller.GetModificationLoadRoot(title_id);
- if (mod_dir != nullptr && mod_dir->GetSize() > 0) {
+ if (mod_dir != nullptr) {
for (const auto& mod : mod_dir->GetSubdirectories()) {
std::string types;
@@ -540,7 +539,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
// SDMC mod directory (RomFS LayeredFS)
const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
- if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0) {
+ if (sdmc_mod_dir != nullptr) {
std::string types;
if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "exefs"))) {
AppendCommaIfNotEmpty(types, "LayeredExeFS");
diff --git a/src/core/file_sys/system_archive/time_zone_binary.cpp b/src/core/file_sys/system_archive/time_zone_binary.cpp
index ceb0b41c6..7c17bbefa 100644
--- a/src/core/file_sys/system_archive/time_zone_binary.cpp
+++ b/src/core/file_sys/system_archive/time_zone_binary.cpp
@@ -15,7 +15,7 @@ namespace FileSys::SystemArchive {
const static std::map<std::string, const std::map<const char*, const std::vector<u8>>&>
tzdb_zoneinfo_dirs = {{"Africa", NxTzdb::africa},
{"America", NxTzdb::america},
- {"Antartica", NxTzdb::antartica},
+ {"Antarctica", NxTzdb::antarctica},
{"Arctic", NxTzdb::arctic},
{"Asia", NxTzdb::asia},
{"Atlantic", NxTzdb::atlantic},
diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp
index 853b893a1..311a59e5f 100644
--- a/src/core/file_sys/vfs_concat.cpp
+++ b/src/core/file_sys/vfs_concat.cpp
@@ -150,23 +150,29 @@ std::size_t ConcatenatedVfsFile::Read(u8* data, std::size_t length, std::size_t
while (cur_length > 0 && it != concatenation_map.end()) {
// Check if we can read the file at this position.
const auto& file = it->file;
- const u64 file_offset = it->offset;
+ const u64 map_offset = it->offset;
const u64 file_size = file->GetSize();
- if (cur_offset >= file_offset + file_size) {
+ if (cur_offset > map_offset + file_size) {
// Entirely out of bounds read.
break;
}
// Read the file at this position.
- const u64 intended_read_size = std::min<u64>(cur_length, file_size);
+ const u64 file_seek = cur_offset - map_offset;
+ const u64 intended_read_size = std::min<u64>(cur_length, file_size - file_seek);
const u64 actual_read_size =
- file->Read(data + (cur_offset - offset), intended_read_size, cur_offset - file_offset);
+ file->Read(data + (cur_offset - offset), intended_read_size, file_seek);
// Update tracking.
cur_offset += actual_read_size;
cur_length -= actual_read_size;
it++;
+
+ // If we encountered a short read, we're done.
+ if (actual_read_size < intended_read_size) {
+ break;
+ }
}
return cur_offset - offset;
diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp
index 7a15d8438..b0515ec05 100644
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -10,6 +10,7 @@
#include "common/fs/fs.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
+#include "core/file_sys/vfs.h"
#include "core/file_sys/vfs_real.h"
// For FileTimeStampRaw
@@ -72,8 +73,10 @@ VfsEntryType RealVfsFilesystem::GetEntryType(std::string_view path_) const {
return VfsEntryType::File;
}
-VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) {
+VirtualFile RealVfsFilesystem::OpenFileFromEntry(std::string_view path_, std::optional<u64> size,
+ Mode perms) {
const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault);
+ std::scoped_lock lk{list_lock};
if (auto it = cache.find(path); it != cache.end()) {
if (auto file = it->second.lock(); file) {
@@ -81,23 +84,30 @@ VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) {
}
}
- if (!FS::Exists(path) || !FS::IsFile(path)) {
+ if (!size && !FS::IsFile(path)) {
return nullptr;
}
auto reference = std::make_unique<FileReference>();
- this->InsertReferenceIntoList(*reference);
+ this->InsertReferenceIntoListLocked(*reference);
- auto file =
- std::shared_ptr<RealVfsFile>(new RealVfsFile(*this, std::move(reference), path, perms));
+ auto file = std::shared_ptr<RealVfsFile>(
+ new RealVfsFile(*this, std::move(reference), path, perms, size));
cache[path] = file;
return file;
}
+VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) {
+ return OpenFileFromEntry(path_, {}, perms);
+}
+
VirtualFile RealVfsFilesystem::CreateFile(std::string_view path_, Mode perms) {
const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault);
- cache.erase(path);
+ {
+ std::scoped_lock lk{list_lock};
+ cache.erase(path);
+ }
// Current usages of CreateFile expect to delete the contents of an existing file.
if (FS::IsFile(path)) {
@@ -127,8 +137,11 @@ VirtualFile RealVfsFilesystem::CopyFile(std::string_view old_path_, std::string_
VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) {
const auto old_path = FS::SanitizePath(old_path_, FS::DirectorySeparator::PlatformDefault);
const auto new_path = FS::SanitizePath(new_path_, FS::DirectorySeparator::PlatformDefault);
- cache.erase(old_path);
- cache.erase(new_path);
+ {
+ std::scoped_lock lk{list_lock};
+ cache.erase(old_path);
+ cache.erase(new_path);
+ }
if (!FS::RenameFile(old_path, new_path)) {
return nullptr;
}
@@ -137,7 +150,10 @@ VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_
bool RealVfsFilesystem::DeleteFile(std::string_view path_) {
const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault);
- cache.erase(path);
+ {
+ std::scoped_lock lk{list_lock};
+ cache.erase(path);
+ }
return FS::RemoveFile(path);
}
@@ -176,14 +192,17 @@ bool RealVfsFilesystem::DeleteDirectory(std::string_view path_) {
return FS::RemoveDirRecursively(path);
}
-void RealVfsFilesystem::RefreshReference(const std::string& path, Mode perms,
- FileReference& reference) {
+std::unique_lock<std::mutex> RealVfsFilesystem::RefreshReference(const std::string& path,
+ Mode perms,
+ FileReference& reference) {
+ std::unique_lock lk{list_lock};
+
// Temporarily remove from list.
- this->RemoveReferenceFromList(reference);
+ this->RemoveReferenceFromListLocked(reference);
// Restore file if needed.
if (!reference.file) {
- this->EvictSingleReference();
+ this->EvictSingleReferenceLocked();
reference.file =
FS::FileOpen(path, ModeFlagsToFileAccessMode(perms), FS::FileType::BinaryFile);
@@ -193,12 +212,16 @@ void RealVfsFilesystem::RefreshReference(const std::string& path, Mode perms,
}
// Reinsert into list.
- this->InsertReferenceIntoList(reference);
+ this->InsertReferenceIntoListLocked(reference);
+
+ return lk;
}
void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference) {
+ std::scoped_lock lk{list_lock};
+
// Remove from list.
- this->RemoveReferenceFromList(*reference);
+ this->RemoveReferenceFromListLocked(*reference);
// Close the file.
if (reference->file) {
@@ -207,14 +230,14 @@ void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference
}
}
-void RealVfsFilesystem::EvictSingleReference() {
+void RealVfsFilesystem::EvictSingleReferenceLocked() {
if (num_open_files < MaxOpenFiles || open_references.empty()) {
return;
}
// Get and remove from list.
auto& reference = open_references.back();
- this->RemoveReferenceFromList(reference);
+ this->RemoveReferenceFromListLocked(reference);
// Close the file.
if (reference.file) {
@@ -223,10 +246,10 @@ void RealVfsFilesystem::EvictSingleReference() {
}
// Reinsert into closed list.
- this->InsertReferenceIntoList(reference);
+ this->InsertReferenceIntoListLocked(reference);
}
-void RealVfsFilesystem::InsertReferenceIntoList(FileReference& reference) {
+void RealVfsFilesystem::InsertReferenceIntoListLocked(FileReference& reference) {
if (reference.file) {
open_references.push_front(reference);
} else {
@@ -234,7 +257,7 @@ void RealVfsFilesystem::InsertReferenceIntoList(FileReference& reference) {
}
}
-void RealVfsFilesystem::RemoveReferenceFromList(FileReference& reference) {
+void RealVfsFilesystem::RemoveReferenceFromListLocked(FileReference& reference) {
if (reference.file) {
open_references.erase(open_references.iterator_to(reference));
} else {
@@ -243,10 +266,10 @@ void RealVfsFilesystem::RemoveReferenceFromList(FileReference& reference) {
}
RealVfsFile::RealVfsFile(RealVfsFilesystem& base_, std::unique_ptr<FileReference> reference_,
- const std::string& path_, Mode perms_)
+ const std::string& path_, Mode perms_, std::optional<u64> size_)
: base(base_), reference(std::move(reference_)), path(path_),
parent_path(FS::GetParentPath(path_)), path_components(FS::SplitPathComponents(path_)),
- perms(perms_) {}
+ size(size_), perms(perms_) {}
RealVfsFile::~RealVfsFile() {
base.DropReference(std::move(reference));
@@ -257,12 +280,15 @@ std::string RealVfsFile::GetName() const {
}
std::size_t RealVfsFile::GetSize() const {
- base.RefreshReference(path, perms, *reference);
- return reference->file ? reference->file->GetSize() : 0;
+ if (size) {
+ return *size;
+ }
+ return FS::GetSize(path);
}
bool RealVfsFile::Resize(std::size_t new_size) {
- base.RefreshReference(path, perms, *reference);
+ size.reset();
+ auto lk = base.RefreshReference(path, perms, *reference);
return reference->file ? reference->file->SetSize(new_size) : false;
}
@@ -279,7 +305,7 @@ bool RealVfsFile::IsReadable() const {
}
std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const {
- base.RefreshReference(path, perms, *reference);
+ auto lk = base.RefreshReference(path, perms, *reference);
if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) {
return 0;
}
@@ -287,7 +313,8 @@ std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset)
}
std::size_t RealVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) {
- base.RefreshReference(path, perms, *reference);
+ size.reset();
+ auto lk = base.RefreshReference(path, perms, *reference);
if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) {
return 0;
}
@@ -309,10 +336,11 @@ std::vector<VirtualFile> RealVfsDirectory::IterateEntries<RealVfsFile, VfsFile>(
std::vector<VirtualFile> out;
- const FS::DirEntryCallable callback = [this, &out](const std::filesystem::path& full_path) {
- const auto full_path_string = FS::PathToUTF8String(full_path);
+ const FS::DirEntryCallable callback = [this,
+ &out](const std::filesystem::directory_entry& entry) {
+ const auto full_path_string = FS::PathToUTF8String(entry.path());
- out.emplace_back(base.OpenFile(full_path_string, perms));
+ out.emplace_back(base.OpenFileFromEntry(full_path_string, entry.file_size(), perms));
return true;
};
@@ -330,8 +358,9 @@ std::vector<VirtualDir> RealVfsDirectory::IterateEntries<RealVfsDirectory, VfsDi
std::vector<VirtualDir> out;
- const FS::DirEntryCallable callback = [this, &out](const std::filesystem::path& full_path) {
- const auto full_path_string = FS::PathToUTF8String(full_path);
+ const FS::DirEntryCallable callback = [this,
+ &out](const std::filesystem::directory_entry& entry) {
+ const auto full_path_string = FS::PathToUTF8String(entry.path());
out.emplace_back(base.OpenDirectory(full_path_string, perms));
@@ -483,12 +512,10 @@ std::map<std::string, VfsEntryType, std::less<>> RealVfsDirectory::GetEntries()
std::map<std::string, VfsEntryType, std::less<>> out;
- const FS::DirEntryCallable callback = [&out](const std::filesystem::path& full_path) {
- const auto filename = FS::PathToUTF8String(full_path.filename());
-
+ const FS::DirEntryCallable callback = [&out](const std::filesystem::directory_entry& entry) {
+ const auto filename = FS::PathToUTF8String(entry.path().filename());
out.insert_or_assign(filename,
- FS::IsDir(full_path) ? VfsEntryType::Directory : VfsEntryType::File);
-
+ entry.is_directory() ? VfsEntryType::Directory : VfsEntryType::File);
return true;
};
diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h
index d8c900e33..26ea7df62 100644
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -4,6 +4,8 @@
#pragma once
#include <map>
+#include <mutex>
+#include <optional>
#include <string_view>
#include "common/intrusive_list.h"
#include "core/file_sys/mode.h"
@@ -20,6 +22,8 @@ struct FileReference : public Common::IntrusiveListBaseNode<FileReference> {
};
class RealVfsFile;
+class RealVfsDirectory;
+
class RealVfsFilesystem : public VfsFilesystem {
public:
RealVfsFilesystem();
@@ -45,17 +49,24 @@ private:
std::map<std::string, std::weak_ptr<VfsFile>, std::less<>> cache;
ReferenceListType open_references;
ReferenceListType closed_references;
+ std::mutex list_lock;
size_t num_open_files{};
private:
friend class RealVfsFile;
- void RefreshReference(const std::string& path, Mode perms, FileReference& reference);
+ std::unique_lock<std::mutex> RefreshReference(const std::string& path, Mode perms,
+ FileReference& reference);
void DropReference(std::unique_ptr<FileReference>&& reference);
- void EvictSingleReference();
private:
- void InsertReferenceIntoList(FileReference& reference);
- void RemoveReferenceFromList(FileReference& reference);
+ friend class RealVfsDirectory;
+ VirtualFile OpenFileFromEntry(std::string_view path, std::optional<u64> size,
+ Mode perms = Mode::Read);
+
+private:
+ void EvictSingleReferenceLocked();
+ void InsertReferenceIntoListLocked(FileReference& reference);
+ void RemoveReferenceFromListLocked(FileReference& reference);
};
// An implementation of VfsFile that represents a file on the user's computer.
@@ -78,13 +89,14 @@ public:
private:
RealVfsFile(RealVfsFilesystem& base, std::unique_ptr<FileReference> reference,
- const std::string& path, Mode perms = Mode::Read);
+ const std::string& path, Mode perms = Mode::Read, std::optional<u64> size = {});
RealVfsFilesystem& base;
std::unique_ptr<FileReference> reference;
std::string path;
std::string parent_path;
std::vector<std::string> path_components;
+ std::optional<u64> size;
Mode perms;
};
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index faa12b4f0..75ce5a23c 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -184,7 +184,8 @@ u64 KScheduler::UpdateHighestPriorityThread(KThread* highest_thread) {
prev_highest_thread != highest_thread) [[likely]] {
if (prev_highest_thread != nullptr) [[likely]] {
IncrementScheduledCount(prev_highest_thread);
- prev_highest_thread->SetLastScheduledTick(m_kernel.System().CoreTiming().GetCPUTicks());
+ prev_highest_thread->SetLastScheduledTick(
+ m_kernel.System().CoreTiming().GetClockTicks());
}
if (m_state.should_count_idle) {
if (highest_thread != nullptr) [[likely]] {
@@ -351,7 +352,7 @@ void KScheduler::SwitchThread(KThread* next_thread) {
// Update the CPU time tracking variables.
const s64 prev_tick = m_last_context_switch_time;
- const s64 cur_tick = m_kernel.System().CoreTiming().GetCPUTicks();
+ const s64 cur_tick = m_kernel.System().CoreTiming().GetClockTicks();
const s64 tick_diff = cur_tick - prev_tick;
cur_thread->AddCpuTime(m_core_id, tick_diff);
if (cur_process != nullptr) {
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index b7da3eee7..3e5b735b1 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "core/hle/kernel/k_scheduler.h"
#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
#include "core/hle/kernel/k_synchronization_object.h"
@@ -75,7 +76,7 @@ Result KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
KSynchronizationObject** objects, const s32 num_objects,
s64 timeout) {
// Allocate space on stack for thread nodes.
- std::vector<ThreadListNode> thread_nodes(num_objects);
+ std::array<ThreadListNode, Svc::ArgumentHandleCountMax> thread_nodes;
// Prepare for wait.
KThread* thread = GetCurrentThreadPointer(kernel);
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index 70480b725..adb6ec581 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -4,6 +4,8 @@
#include <algorithm>
#include <atomic>
#include <cinttypes>
+#include <condition_variable>
+#include <mutex>
#include <optional>
#include <vector>
@@ -907,7 +909,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) {
R_SUCCEED();
}
-Result KThread::GetThreadContext3(std::vector<u8>& out) {
+Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) {
// Lock ourselves.
KScopedLightLock lk{m_activity_pause_lock};
@@ -925,15 +927,13 @@ Result KThread::GetThreadContext3(std::vector<u8>& out) {
// Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
auto context = GetContext64();
context.pstate &= 0xFF0FFE20;
-
- out.resize(sizeof(context));
+ out.resize_destructive(sizeof(context));
std::memcpy(out.data(), std::addressof(context), sizeof(context));
} else {
// Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
auto context = GetContext32();
context.cpsr &= 0xFF0FFE20;
-
- out.resize(sizeof(context));
+ out.resize_destructive(sizeof(context));
std::memcpy(out.data(), std::addressof(context), sizeof(context));
}
}
@@ -1313,7 +1313,8 @@ void KThread::RequestDummyThreadWait() {
ASSERT(this->IsDummyThread());
// We will block when the scheduler lock is released.
- m_dummy_thread_runnable.store(false);
+ std::scoped_lock lock{m_dummy_thread_mutex};
+ m_dummy_thread_runnable = false;
}
void KThread::DummyThreadBeginWait() {
@@ -1323,7 +1324,8 @@ void KThread::DummyThreadBeginWait() {
}
// Block until runnable is no longer false.
- m_dummy_thread_runnable.wait(false);
+ std::unique_lock lock{m_dummy_thread_mutex};
+ m_dummy_thread_cv.wait(lock, [this] { return m_dummy_thread_runnable; });
}
void KThread::DummyThreadEndWait() {
@@ -1331,8 +1333,11 @@ void KThread::DummyThreadEndWait() {
ASSERT(this->IsDummyThread());
// Wake up the waiting thread.
- m_dummy_thread_runnable.store(true);
- m_dummy_thread_runnable.notify_one();
+ {
+ std::scoped_lock lock{m_dummy_thread_mutex};
+ m_dummy_thread_runnable = true;
+ }
+ m_dummy_thread_cv.notify_one();
}
void KThread::BeginWait(KThreadQueue* queue) {
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index f9814ac8f..dd662b3f8 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -15,6 +15,7 @@
#include "common/intrusive_list.h"
#include "common/intrusive_red_black_tree.h"
+#include "common/scratch_buffer.h"
#include "common/spin_lock.h"
#include "core/arm/arm_interface.h"
#include "core/hle/kernel/k_affinity_mask.h"
@@ -567,7 +568,7 @@ public:
void RemoveWaiter(KThread* thread);
- Result GetThreadContext3(std::vector<u8>& out);
+ Result GetThreadContext3(Common::ScratchBuffer<u8>& out);
KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) {
return this->RemoveWaiterByKey(out_has_waiters, key, false);
@@ -892,7 +893,9 @@ private:
std::shared_ptr<Common::Fiber> m_host_context{};
ThreadType m_thread_type{};
StepState m_step_state{};
- std::atomic<bool> m_dummy_thread_runnable{true};
+ bool m_dummy_thread_runnable{true};
+ std::mutex m_dummy_thread_mutex{};
+ std::condition_variable m_dummy_thread_cv{};
// For debugging
std::vector<KSynchronizationObject*> m_wait_objects_for_debugging{};
diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp
index 2b2c878b5..445cdd87b 100644
--- a/src/core/hle/kernel/svc/svc_info.cpp
+++ b/src/core/hle/kernel/svc/svc_info.cpp
@@ -199,9 +199,9 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle
if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
const u64 thread_ticks = current_thread->GetCpuTime();
- out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);
+ out_ticks = thread_ticks + (core_timing.GetClockTicks() - prev_ctx_ticks);
} else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) {
- out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;
+ out_ticks = core_timing.GetClockTicks() - prev_ctx_ticks;
}
*result = out_ticks;
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index ea03068aa..60247df2e 100644
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h"
+#include "common/scratch_buffer.h"
#include "core/core.h"
#include "core/hle/kernel/k_client_session.h"
#include "core/hle/kernel/k_process.h"
@@ -45,11 +46,11 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)),
ResultInvalidPointer);
- std::vector<Handle> handles(num_handles);
+ std::array<Handle, Svc::ArgumentHandleCountMax> handles;
GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles);
// Convert handle list to object table.
- std::vector<KSynchronizationObject*> objs(num_handles);
+ std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(),
num_handles),
ResultInvalidHandle);
@@ -80,7 +81,7 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
// Wait for an object.
s32 index;
Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(),
- static_cast<s32>(objs.size()), timeout_ns);
+ num_handles, timeout_ns);
if (result == ResultTimedOut) {
R_RETURN(result);
}
diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp
index 04d65f0bd..53df5bcd8 100644
--- a/src/core/hle/kernel/svc/svc_synchronization.cpp
+++ b/src/core/hle/kernel/svc/svc_synchronization.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/scope_exit.h"
+#include "common/scratch_buffer.h"
#include "core/core.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_readable_event.h"
@@ -54,7 +55,7 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
// Get the synchronization context.
auto& kernel = system.Kernel();
auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
- std::vector<KSynchronizationObject*> objs(num_handles);
+ std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
// Copy user handles.
if (num_handles > 0) {
@@ -72,8 +73,8 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
});
// Wait on the objects.
- Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(),
- static_cast<s32>(objs.size()), timeout_ns);
+ Result res =
+ KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns);
R_SUCCEED_IF(res == ResultSessionClosed);
R_RETURN(res);
@@ -87,8 +88,7 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha
// Ensure number of handles is valid.
R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
-
- std::vector<Handle> handles(num_handles);
+ std::array<Handle, Svc::ArgumentHandleCountMax> handles;
if (num_handles > 0) {
GetCurrentMemory(system.Kernel())
.ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle));
diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp
index 37b54079c..36b94e6bf 100644
--- a/src/core/hle/kernel/svc/svc_thread.cpp
+++ b/src/core/hle/kernel/svc/svc_thread.cpp
@@ -174,7 +174,7 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha
}
// Get the thread context.
- std::vector<u8> context;
+ static thread_local Common::ScratchBuffer<u8> context;
R_TRY(thread->GetThreadContext3(context));
// Copy the thread context to user space.
diff --git a/src/core/hle/kernel/svc/svc_tick.cpp b/src/core/hle/kernel/svc/svc_tick.cpp
index 561336482..7dd7c6e51 100644
--- a/src/core/hle/kernel/svc/svc_tick.cpp
+++ b/src/core/hle/kernel/svc/svc_tick.cpp
@@ -12,16 +12,8 @@ namespace Kernel::Svc {
int64_t GetSystemTick(Core::System& system) {
LOG_TRACE(Kernel_SVC, "called");
- auto& core_timing = system.CoreTiming();
-
// Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
- const u64 result{core_timing.GetClockTicks()};
-
- if (!system.Kernel().IsMulticore()) {
- core_timing.AddTicks(400U);
- }
-
- return static_cast<int64_t>(result);
+ return static_cast<int64_t>(system.CoreTiming().GetClockTicks());
}
int64_t GetSystemTick64(Core::System& system) {
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index f0640c64f..c8d574993 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -5,6 +5,7 @@
#include "audio_core/renderer/audio_device.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/hle/kernel/k_event.h"
@@ -123,19 +124,13 @@ private:
void GetReleasedAudioInBuffer(HLERequestContext& ctx) {
const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
- std::vector<u64> released_buffers(write_buffer_size);
+ tmp_buffer.resize_destructive(write_buffer_size);
+ tmp_buffer[0] = 0;
- const auto count = impl->GetReleasedBuffers(released_buffers);
+ const auto count = impl->GetReleasedBuffers(tmp_buffer);
- [[maybe_unused]] std::string tags{};
- for (u32 i = 0; i < count; i++) {
- tags += fmt::format("{:08X}, ", released_buffers[i]);
- }
- [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()};
- LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
- tags);
+ ctx.WriteBuffer(tmp_buffer);
- ctx.WriteBuffer(released_buffers);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.Push(count);
@@ -200,6 +195,7 @@ private:
KernelHelpers::ServiceContext service_context;
Kernel::KEvent* event;
std::shared_ptr<AudioCore::AudioIn::In> impl;
+ Common::ScratchBuffer<u64> tmp_buffer;
};
AudInU::AudInU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 3e62fa4fc..032c8c11f 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -123,19 +123,13 @@ private:
void GetReleasedAudioOutBuffers(HLERequestContext& ctx) {
const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
- std::vector<u64> released_buffers(write_buffer_size);
+ tmp_buffer.resize_destructive(write_buffer_size);
+ tmp_buffer[0] = 0;
- const auto count = impl->GetReleasedBuffers(released_buffers);
+ const auto count = impl->GetReleasedBuffers(tmp_buffer);
- [[maybe_unused]] std::string tags{};
- for (u32 i = 0; i < count; i++) {
- tags += fmt::format("{:08X}, ", released_buffers[i]);
- }
- [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()};
- LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
- tags);
+ ctx.WriteBuffer(tmp_buffer);
- ctx.WriteBuffer(released_buffers);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.Push(count);
@@ -211,6 +205,7 @@ private:
KernelHelpers::ServiceContext service_context;
Kernel::KEvent* event;
std::shared_ptr<AudioCore::AudioOut::Out> impl;
+ Common::ScratchBuffer<u64> tmp_buffer;
};
AudOutU::AudOutU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7086d4750..12845c23a 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -116,28 +116,26 @@ private:
// These buffers are written manually to avoid an issue with WriteBuffer throwing errors for
// checking size 0. Performance size is 0 for most games.
- std::vector<u8> output{};
- std::vector<u8> performance{};
auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0};
if (is_buffer_b) {
const auto buffersB{ctx.BufferDescriptorB()};
- output.resize(buffersB[0].Size(), 0);
- performance.resize(buffersB[1].Size(), 0);
+ tmp_output.resize_destructive(buffersB[0].Size());
+ tmp_performance.resize_destructive(buffersB[1].Size());
} else {
const auto buffersC{ctx.BufferDescriptorC()};
- output.resize(buffersC[0].Size(), 0);
- performance.resize(buffersC[1].Size(), 0);
+ tmp_output.resize_destructive(buffersC[0].Size());
+ tmp_performance.resize_destructive(buffersC[1].Size());
}
- auto result = impl->RequestUpdate(input, performance, output);
+ auto result = impl->RequestUpdate(input, tmp_performance, tmp_output);
if (result.IsSuccess()) {
if (is_buffer_b) {
- ctx.WriteBufferB(output.data(), output.size(), 0);
- ctx.WriteBufferB(performance.data(), performance.size(), 1);
+ ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0);
+ ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1);
} else {
- ctx.WriteBufferC(output.data(), output.size(), 0);
- ctx.WriteBufferC(performance.data(), performance.size(), 1);
+ ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0);
+ ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1);
}
} else {
LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description);
@@ -235,6 +233,8 @@ private:
Kernel::KEvent* rendered_event;
Manager& manager;
std::unique_ptr<Renderer> impl;
+ Common::ScratchBuffer<u8> tmp_output;
+ Common::ScratchBuffer<u8> tmp_performance;
};
class IAudioDevice final : public ServiceFramework<IAudioDevice> {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 24ce37e87..d8e9c8719 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -4,6 +4,7 @@
#pragma once
#include "audio_core/audio_render_manager.h"
+#include "common/scratch_buffer.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/service.h"
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 451ac224a..c835f6cb7 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -68,13 +68,13 @@ private:
ExtraBehavior extra_behavior) {
u32 consumed = 0;
u32 sample_count = 0;
- std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>());
+ tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
if (extra_behavior == ExtraBehavior::ResetContext) {
ResetDecoderContext();
}
- if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
+ if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) {
LOG_ERROR(Audio, "Failed to decode opus data");
IPC::ResponseBuilder rb{ctx, 2};
// TODO(ogniK): Use correct error code
@@ -90,11 +90,11 @@ private:
if (performance) {
rb.Push<u64>(*performance);
}
- ctx.WriteBuffer(samples);
+ ctx.WriteBuffer(tmp_samples);
}
bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input,
- std::vector<opus_int16>& output, u64* out_performance_time) const {
+ std::span<opus_int16> output, u64* out_performance_time) const {
const auto start_time = std::chrono::steady_clock::now();
const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
if (sizeof(OpusPacketHeader) > input.size()) {
@@ -154,6 +154,7 @@ private:
OpusDecoderPtr decoder;
u32 sample_rate;
u32 channel_count;
+ Common::ScratchBuffer<opus_int16> tmp_samples;
};
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
diff --git a/src/core/hle/service/hid/hidbus.cpp b/src/core/hle/service/hid/hidbus.cpp
index 5604a6fda..80aac221b 100644
--- a/src/core/hle/service/hid/hidbus.cpp
+++ b/src/core/hle/service/hid/hidbus.cpp
@@ -5,7 +5,6 @@
#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/hid/hid_types.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_readable_event.h"
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index ab1f30f9e..a04538d5d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -34,7 +34,7 @@ public:
* @returns The result code of the ioctl.
*/
virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) = 0;
+ std::span<u8> output) = 0;
/**
* Handles an ioctl2 request.
@@ -45,7 +45,7 @@ public:
* @returns The result code of the ioctl.
*/
virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) = 0;
+ std::span<const u8> inline_input, std::span<u8> output) = 0;
/**
* Handles an ioctl3 request.
@@ -56,7 +56,7 @@ public:
* @returns The result code of the ioctl.
*/
virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) = 0;
+ std::span<u8> output, std::span<u8> inline_output) = 0;
/**
* Called once a device is opened
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 5a5b2e305..05a43d8dc 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,19 +18,19 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
nvdisp_disp0::~nvdisp_disp0() = default;
NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
@@ -51,8 +51,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
stride, format, transform, crop_rect};
system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
- system.GetPerfStats().EndSystemFrame();
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
+ system.GetPerfStats().EndSystemFrame();
system.GetPerfStats().BeginSystemFrame();
}
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index bcd0e3ed5..daee05fe8 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -26,11 +26,11 @@ public:
~nvdisp_disp0() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 681bd0867..07e570a9f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -28,7 +28,7 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Con
nvhost_as_gpu::~nvhost_as_gpu() = default;
NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 'A':
switch (command.cmd) {
@@ -61,13 +61,13 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
}
NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
switch (command.group) {
case 'A':
switch (command.cmd) {
@@ -87,7 +87,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
void nvhost_as_gpu::OnOpen(DeviceFD fd) {}
void nvhost_as_gpu::OnClose(DeviceFD fd) {}
-NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::span<u8> output) {
IoctlAllocAsEx params{};
std::memcpy(&params, input.data(), input.size());
@@ -141,7 +141,7 @@ NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& ou
return NvResult::Success;
}
-NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::span<u8> output) {
IoctlAllocSpace params{};
std::memcpy(&params, input.data(), input.size());
@@ -220,7 +220,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
mapping_map.erase(offset);
}
-NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::span<u8> output) {
IoctlFreeSpace params{};
std::memcpy(&params, input.data(), input.size());
@@ -266,15 +266,14 @@ NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& ou
return NvResult::Success;
}
-NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::span<u8> output) {
const auto num_entries = input.size() / sizeof(IoctlRemapEntry);
LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
- std::vector<IoctlRemapEntry> entries(num_entries);
- std::memcpy(entries.data(), input.data(), input.size());
-
std::scoped_lock lock(mutex);
+ entries.resize_destructive(num_entries);
+ std::memcpy(entries.data(), input.data(), input.size());
if (!vm.initialised) {
return NvResult::BadValue;
@@ -320,7 +319,7 @@ NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output
return NvResult::Success;
}
-NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::span<u8> output) {
IoctlMapBufferEx params{};
std::memcpy(&params, input.data(), input.size());
@@ -424,7 +423,7 @@ NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>&
return NvResult::Success;
}
-NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlUnmapBuffer params{};
std::memcpy(&params, input.data(), input.size());
@@ -463,7 +462,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>&
return NvResult::Success;
}
-NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::span<u8> output) {
IoctlBindChannel params{};
std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
@@ -492,7 +491,7 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
};
}
-NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output) {
IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size());
@@ -511,8 +510,8 @@ NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>&
return NvResult::Success;
}
-NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) {
+NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) {
IoctlGetVaRegions params{};
std::memcpy(&params, input.data(), input.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 1aba8d579..2af3e1260 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -15,6 +15,7 @@
#include "common/address_space.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/scratch_buffer.h"
#include "common/swap.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/devices/nvdevice.h"
@@ -48,11 +49,11 @@ public:
~nvhost_as_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
@@ -138,18 +139,18 @@ private:
static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
"IoctlGetVaRegions is incorrect size");
- NvResult AllocAsEx(std::span<const u8> input, std::vector<u8>& output);
- NvResult AllocateSpace(std::span<const u8> input, std::vector<u8>& output);
- NvResult Remap(std::span<const u8> input, std::vector<u8>& output);
- NvResult MapBufferEx(std::span<const u8> input, std::vector<u8>& output);
- NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output);
- NvResult FreeSpace(std::span<const u8> input, std::vector<u8>& output);
- NvResult BindChannel(std::span<const u8> input, std::vector<u8>& output);
+ NvResult AllocAsEx(std::span<const u8> input, std::span<u8> output);
+ NvResult AllocateSpace(std::span<const u8> input, std::span<u8> output);
+ NvResult Remap(std::span<const u8> input, std::span<u8> output);
+ NvResult MapBufferEx(std::span<const u8> input, std::span<u8> output);
+ NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
+ NvResult FreeSpace(std::span<const u8> input, std::span<u8> output);
+ NvResult BindChannel(std::span<const u8> input, std::span<u8> output);
void GetVARegionsImpl(IoctlGetVaRegions& params);
- NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output);
- NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output);
+ NvResult GetVARegions(std::span<const u8> input, std::span<u8> output);
+ NvResult GetVARegions(std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output);
void FreeMappingLocked(u64 offset);
@@ -212,6 +213,7 @@ private:
bool initialised{};
} vm;
std::shared_ptr<Tegra::MemoryManager> gmmu;
+ Common::ScratchBuffer<IoctlRemapEntry> entries;
// s32 channel{};
// u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index e12025560..4d55554b4 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -35,7 +35,7 @@ nvhost_ctrl::~nvhost_ctrl() {
}
NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 0x0:
switch (command.cmd) {
@@ -64,13 +64,13 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp
}
NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_outpu) {
+ std::span<u8> output, std::span<u8> inline_outpu) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
@@ -79,7 +79,7 @@ void nvhost_ctrl::OnOpen(DeviceFD fd) {}
void nvhost_ctrl::OnClose(DeviceFD fd) {}
-NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output) {
IocGetConfigParams params{};
std::memcpy(&params, input.data(), sizeof(params));
LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(),
@@ -87,7 +87,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8
return NvResult::ConfigVarNotFound; // Returns error on production mode
}
-NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output,
+NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::span<u8> output,
bool is_allocation) {
IocCtrlEventWaitParams params{};
std::memcpy(&params, input.data(), sizeof(params));
@@ -231,7 +231,7 @@ NvResult nvhost_ctrl::FreeEvent(u32 slot) {
return NvResult::Success;
}
-NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventRegisterParams params{};
std::memcpy(&params, input.data(), sizeof(params));
const u32 event_id = params.user_event_id;
@@ -252,7 +252,7 @@ NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vecto
return NvResult::Success;
}
-NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventUnregisterParams params{};
std::memcpy(&params, input.data(), sizeof(params));
const u32 event_id = params.user_event_id & 0x00FF;
@@ -262,8 +262,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vec
return FreeEvent(event_id);
}
-NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input,
- std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventUnregisterBatchParams params{};
std::memcpy(&params, input.data(), sizeof(params));
u64 event_mask = params.user_events;
@@ -281,7 +280,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input,
return NvResult::Success;
}
-NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output) {
IocCtrlEventClearParams params{};
std::memcpy(&params, input.data(), sizeof(params));
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index dd2e7888a..2efed4862 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -26,11 +26,11 @@ public:
~nvhost_ctrl() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
@@ -186,13 +186,12 @@ private:
static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8,
"IocCtrlEventKill is incorrect size");
- NvResult NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output,
- bool is_allocation);
- NvResult IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output);
+ NvResult NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output);
+ NvResult IocCtrlEventWait(std::span<const u8> input, std::span<u8> output, bool is_allocation);
+ NvResult IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output);
+ NvResult IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output);
+ NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output);
+ NvResult IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output);
NvResult FreeEvent(u32 slot);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index be3c083db..6081d92e9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -22,7 +22,7 @@ nvhost_ctrl_gpu::~nvhost_ctrl_gpu() {
}
NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 'G':
switch (command.cmd) {
@@ -54,13 +54,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8>
}
NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
switch (command.group) {
case 'G':
switch (command.cmd) {
@@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {}
void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {}
-NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlCharacteristics params{};
std::memcpy(&params, input.data(), input.size());
@@ -127,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) {
+NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlCharacteristics params{};
std::memcpy(&params, input.data(), input.size());
@@ -175,7 +175,7 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output) {
IoctlGpuGetTpcMasksArgs params{};
std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@@ -186,8 +186,8 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) {
+NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) {
IoctlGpuGetTpcMasksArgs params{};
std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@@ -199,7 +199,7 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlActiveSlotMask params{};
@@ -212,7 +212,7 @@ NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vect
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlZcullGetCtxSize params{};
@@ -224,7 +224,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlNvgpuGpuZcullGetInfoArgs params{};
@@ -247,7 +247,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::span<u8> output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
IoctlZbcSetTable params{};
@@ -263,7 +263,7 @@ NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::span<u8> output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
IoctlZbcQueryTable params{};
@@ -273,7 +273,7 @@ NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::span<u8> output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
IoctlFlushL2 params{};
@@ -283,7 +283,7 @@ NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& ou
return NvResult::Success;
}
-NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlGetGpuTime params{};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index b9333d9d3..97995551c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -22,11 +22,11 @@ public:
~nvhost_ctrl_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
@@ -151,21 +151,21 @@ private:
};
static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");
- NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output);
- NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output);
-
- NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output);
- NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output);
-
- NvResult GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output);
- NvResult ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output);
- NvResult ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output);
- NvResult ZBCSetTable(std::span<const u8> input, std::vector<u8>& output);
- NvResult ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output);
- NvResult FlushL2(std::span<const u8> input, std::vector<u8>& output);
- NvResult GetGpuTime(std::span<const u8> input, std::vector<u8>& output);
+ NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output);
+ NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output);
+
+ NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output);
+ NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output);
+
+ NvResult GetActiveSlotMask(std::span<const u8> input, std::span<u8> output);
+ NvResult ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output);
+ NvResult ZCullGetInfo(std::span<const u8> input, std::span<u8> output);
+ NvResult ZBCSetTable(std::span<const u8> input, std::span<u8> output);
+ NvResult ZBCQueryTable(std::span<const u8> input, std::span<u8> output);
+ NvResult FlushL2(std::span<const u8> input, std::span<u8> output);
+ NvResult GetGpuTime(std::span<const u8> input, std::span<u8> output);
EventInterface& events_interface;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 453a965dc..46a25fcab 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -47,7 +47,7 @@ nvhost_gpu::~nvhost_gpu() {
}
NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 0x0:
switch (command.cmd) {
@@ -99,7 +99,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
};
NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
switch (command.group) {
case 'H':
switch (command.cmd) {
@@ -113,7 +113,7 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu
}
NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
@@ -121,7 +121,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
void nvhost_gpu::OnOpen(DeviceFD fd) {}
void nvhost_gpu::OnClose(DeviceFD fd) {}
-NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
IoctlSetNvmapFD params{};
std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
@@ -130,7 +130,7 @@ NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& outp
return NvResult::Success;
}
-NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlClientData params{};
@@ -139,7 +139,7 @@ NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& o
return NvResult::Success;
}
-NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::span<u8> output) {
LOG_DEBUG(Service_NVDRV, "called");
IoctlClientData params{};
@@ -149,7 +149,7 @@ NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& o
return NvResult::Success;
}
-NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::span<u8> output) {
std::memcpy(&zcull_params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va,
zcull_params.mode);
@@ -158,7 +158,7 @@ NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& outpu
return NvResult::Success;
}
-NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::span<u8> output) {
IoctlSetErrorNotifier params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset,
@@ -168,14 +168,14 @@ NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>
return NvResult::Success;
}
-NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::span<u8> output) {
std::memcpy(&channel_priority, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
return NvResult::Success;
}
-NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output) {
IoctlAllocGpfifoEx2 params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV,
@@ -197,7 +197,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>&
return NvResult::Success;
}
-NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::span<u8> output) {
IoctlAllocObjCtx params{};
std::memcpy(&params, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num,
@@ -208,7 +208,8 @@ NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vecto
return NvResult::Success;
}
-static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
+static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList(
+ NvFence fence) {
return {
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
Tegra::SubmissionMode::Increasing),
@@ -219,35 +220,35 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
};
}
-static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) {
- std::vector<Tegra::CommandHeader> result{
+static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementCommandList(
+ NvFence fence) {
+ boost::container::small_vector<Tegra::CommandHeader, 512> result{
Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
Tegra::SubmissionMode::Increasing),
{}};
for (u32 count = 0; count < 2; ++count) {
- result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
- Tegra::SubmissionMode::Increasing));
- result.emplace_back(
+ result.push_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
+ Tegra::SubmissionMode::Increasing));
+ result.push_back(
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
}
return result;
}
-static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) {
- std::vector<Tegra::CommandHeader> result{
+static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementWithWfiCommandList(
+ NvFence fence) {
+ boost::container::small_vector<Tegra::CommandHeader, 512> result{
Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
Tegra::SubmissionMode::Increasing),
{}};
- const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)};
-
- result.insert(result.end(), increment.begin(), increment.end());
-
+ auto increment_list{BuildIncrementCommandList(fence)};
+ result.insert(result.end(), increment_list.begin(), increment_list.end());
return result;
}
-NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
Tegra::CommandList&& entries) {
LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
params.num_entries, params.flags.raw);
@@ -293,7 +294,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
return NvResult::Success;
}
-NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output,
+NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
bool kickoff) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED();
@@ -315,7 +316,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>
}
NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
- std::vector<u8>& output) {
+ std::span<u8> output) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED();
return NvResult::InvalidSize;
@@ -327,7 +328,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const
return SubmitGPFIFOImpl(params, output, std::move(entries));
}
-NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
IoctlGetWaitbase params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
@@ -337,7 +338,7 @@ NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& out
return NvResult::Success;
}
-NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::span<u8> output) {
IoctlChannelSetTimeout params{};
std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout));
LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);
@@ -345,7 +346,7 @@ NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8
return NvResult::Success;
}
-NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output) {
IoctlSetTimeslice params{};
std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 3ca58202d..529c20526 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -41,11 +41,11 @@ public:
~nvhost_gpu() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
@@ -186,23 +186,23 @@ private:
u32_le channel_priority{};
u32_le channel_timeslice{};
- NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output);
- NvResult SetClientData(std::span<const u8> input, std::vector<u8>& output);
- NvResult GetClientData(std::span<const u8> input, std::vector<u8>& output);
- NvResult ZCullBind(std::span<const u8> input, std::vector<u8>& output);
- NvResult SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output);
- NvResult SetChannelPriority(std::span<const u8> input, std::vector<u8>& output);
- NvResult AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output);
- NvResult AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output);
- NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+ NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
+ NvResult SetClientData(std::span<const u8> input, std::span<u8> output);
+ NvResult GetClientData(std::span<const u8> input, std::span<u8> output);
+ NvResult ZCullBind(std::span<const u8> input, std::span<u8> output);
+ NvResult SetErrorNotifier(std::span<const u8> input, std::span<u8> output);
+ NvResult SetChannelPriority(std::span<const u8> input, std::span<u8> output);
+ NvResult AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output);
+ NvResult AllocateObjectContext(std::span<const u8> input, std::span<u8> output);
+ NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
Tegra::CommandList&& entries);
- NvResult SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output,
+ NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
bool kickoff = false);
NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
- std::vector<u8>& output);
- NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output);
- NvResult ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output);
- NvResult ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output);
+ std::span<u8> output);
+ NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
+ NvResult ChannelSetTimeout(std::span<const u8> input, std::span<u8> output);
+ NvResult ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output);
EventInterface& events_interface;
NvCore::Container& core;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index dc45169ad..a174442a6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -16,7 +16,7 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
nvhost_nvdec::~nvhost_nvdec() = default;
NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 0x0:
switch (command.cmd) {
@@ -56,13 +56,13 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
}
NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 0d615bbcb..ad2233c49 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -14,11 +14,11 @@ public:
~nvhost_nvdec() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 1ab51f10b..61649aa4a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -36,7 +36,7 @@ std::size_t SliceVectors(std::span<const u8> input, std::vector<T>& dst, std::si
// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
// Returns the number of bytes written into dst.
template <typename T>
-std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
+std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size_t offset) {
if (src.empty()) {
return 0;
}
@@ -72,8 +72,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(std::span<const u8> input) {
return NvResult::Success;
}
-NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input,
- std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) {
IoctlSubmit params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@@ -121,7 +120,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input,
return NvResult::Success;
}
-NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::span<u8> output) {
IoctlGetSyncpoint params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
@@ -133,7 +132,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vecto
return NvResult::Success;
}
-NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
IoctlGetWaitbase params{};
LOG_CRITICAL(Service_NVDRV, "called WAITBASE");
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
@@ -142,7 +141,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector
return NvResult::Success;
}
-NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -159,7 +158,7 @@ NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u
return NvResult::Success;
}
-NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -173,7 +172,7 @@ NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector
return NvResult::Success;
}
-NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::span<u8> output) {
std::memcpy(&submit_timeout, input.data(), input.size());
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 5af26a26f..9bb573bfe 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -108,12 +108,12 @@ protected:
/// Ioctl command implementations
NvResult SetNVMAPfd(std::span<const u8> input);
- NvResult Submit(DeviceFD fd, std::span<const u8> input, std::vector<u8>& output);
- NvResult GetSyncpoint(std::span<const u8> input, std::vector<u8>& output);
- NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output);
- NvResult MapBuffer(std::span<const u8> input, std::vector<u8>& output);
- NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output);
- NvResult SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output);
+ NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output);
+ NvResult GetSyncpoint(std::span<const u8> input, std::span<u8> output);
+ NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
+ NvResult MapBuffer(std::span<const u8> input, std::span<u8> output);
+ NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
+ NvResult SetSubmitTimeout(std::span<const u8> input, std::span<u8> output);
Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 39f30e7c8..a05c8cdae 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,7 +13,7 @@ nvhost_nvjpg::nvhost_nvjpg(Core::System& system_) : nvdevice{system_} {}
nvhost_nvjpg::~nvhost_nvjpg() = default;
NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 'H':
switch (command.cmd) {
@@ -32,13 +32,13 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
}
NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
@@ -46,7 +46,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
void nvhost_nvjpg::OnOpen(DeviceFD fd) {}
void nvhost_nvjpg::OnClose(DeviceFD fd) {}
-NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
IoctlSetNvmapFD params{};
std::memcpy(&params, input.data(), input.size());
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 41b57e872..5623e0d47 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,11 +16,11 @@ public:
~nvhost_nvjpg() override;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
@@ -33,7 +33,7 @@ private:
s32_le nvmap_fd{};
- NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output);
+ NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
};
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index b0ea402a7..c0b8684c3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -16,7 +16,7 @@ nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
nvhost_vic::~nvhost_vic() = default;
NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 0x0:
switch (command.cmd) {
@@ -56,13 +56,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
}
NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+ std::span<u8> output, std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index b5e350a83..cadbcb0a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,11 +13,11 @@ public:
~nvhost_vic();
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 07417f045..e7f7e273b 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -26,7 +26,7 @@ nvmap::nvmap(Core::System& system_, NvCore::Container& container_)
nvmap::~nvmap() = default;
NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
switch (command.group) {
case 0x1:
switch (command.cmd) {
@@ -55,13 +55,13 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
}
NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
-NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) {
UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
return NvResult::NotImplemented;
}
@@ -69,7 +69,7 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
void nvmap::OnOpen(DeviceFD fd) {}
void nvmap::OnClose(DeviceFD fd) {}
-NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvmap::IocCreate(std::span<const u8> input, std::span<u8> output) {
IocCreateParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
@@ -89,7 +89,7 @@ NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success;
}
-NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvmap::IocAlloc(std::span<const u8> input, std::span<u8> output) {
IocAllocParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
@@ -137,7 +137,7 @@ NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) {
return result;
}
-NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvmap::IocGetId(std::span<const u8> input, std::span<u8> output) {
IocGetIdParams params;
std::memcpy(&params, input.data(), sizeof(params));
@@ -161,7 +161,7 @@ NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success;
}
-NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvmap::IocFromId(std::span<const u8> input, std::span<u8> output) {
IocFromIdParams params;
std::memcpy(&params, input.data(), sizeof(params));
@@ -192,7 +192,7 @@ NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success;
}
-NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvmap::IocParam(std::span<const u8> input, std::span<u8> output) {
enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 };
IocParamParams params;
@@ -241,7 +241,7 @@ NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) {
return NvResult::Success;
}
-NvResult nvmap::IocFree(std::span<const u8> input, std::vector<u8>& output) {
+NvResult nvmap::IocFree(std::span<const u8> input, std::span<u8> output) {
IocFreeParams params;
std::memcpy(&params, input.data(), sizeof(params));
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 82bd3b118..40c65b430 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -27,11 +27,11 @@ public:
nvmap& operator=(const nvmap&) = delete;
NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) override;
+ std::span<u8> output) override;
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) override;
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output) override;
+ std::span<const u8> inline_input, std::span<u8> output) override;
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) override;
void OnOpen(DeviceFD fd) override;
void OnClose(DeviceFD fd) override;
@@ -106,12 +106,12 @@ private:
};
static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
- NvResult IocCreate(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocAlloc(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocGetId(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocFromId(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocParam(std::span<const u8> input, std::vector<u8>& output);
- NvResult IocFree(std::span<const u8> input, std::vector<u8>& output);
+ NvResult IocCreate(std::span<const u8> input, std::span<u8> output);
+ NvResult IocAlloc(std::span<const u8> input, std::span<u8> output);
+ NvResult IocGetId(std::span<const u8> input, std::span<u8> output);
+ NvResult IocFromId(std::span<const u8> input, std::span<u8> output);
+ NvResult IocParam(std::span<const u8> input, std::span<u8> output);
+ NvResult IocFree(std::span<const u8> input, std::span<u8> output);
NvCore::Container& container;
NvCore::NvMap& file;
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 3d774eec4..9e46ee8dd 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -130,7 +130,7 @@ DeviceFD Module::Open(const std::string& device_name) {
}
NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output) {
+ std::span<u8> output) {
if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState;
@@ -147,7 +147,7 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
}
NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output) {
+ std::span<const u8> inline_input, std::span<u8> output) {
if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState;
@@ -163,8 +163,8 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
return itr->second->Ioctl2(fd, command, input, inline_input, output);
}
-NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::vector<u8>& output, std::vector<u8>& inline_output) {
+NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output) {
if (fd < 0) {
LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
return NvResult::InvalidState;
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 668be742b..d8622b3ca 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -80,13 +80,13 @@ public:
DeviceFD Open(const std::string& device_name);
/// Sends an ioctl command to the specified file descriptor.
- NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output);
+ NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output);
NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
- std::span<const u8> inline_input, std::vector<u8>& output);
+ std::span<const u8> inline_input, std::span<u8> output);
- NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output,
- std::vector<u8>& inline_output);
+ NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
+ std::span<u8> inline_output);
/// Closes a device file descriptor and returns operation success.
NvResult Close(DeviceFD fd);
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index d010a1e03..348207e25 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -63,12 +63,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) {
}
// Check device
- std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+ tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
const auto input_buffer = ctx.ReadBuffer(0);
- const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
+ const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output);
if (command.is_out != 0) {
- ctx.WriteBuffer(output_buffer);
+ ctx.WriteBuffer(tmp_output);
}
IPC::ResponseBuilder rb{ctx, 3};
@@ -90,12 +90,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) {
const auto input_buffer = ctx.ReadBuffer(0);
const auto input_inlined_buffer = ctx.ReadBuffer(1);
- std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
+ tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
const auto nv_result =
- nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
+ nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output);
if (command.is_out != 0) {
- ctx.WriteBuffer(output_buffer);
+ ctx.WriteBuffer(tmp_output);
}
IPC::ResponseBuilder rb{ctx, 3};
@@ -116,14 +116,12 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) {
}
const auto input_buffer = ctx.ReadBuffer(0);
- std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
- std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));
-
- const auto nv_result =
- nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
+ tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
+ tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1));
+ const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline);
if (command.is_out != 0) {
- ctx.WriteBuffer(output_buffer, 0);
- ctx.WriteBuffer(output_buffer_inline, 1);
+ ctx.WriteBuffer(tmp_output, 0);
+ ctx.WriteBuffer(tmp_output_inline, 1);
}
IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 881ea1a6b..4b593ff90 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -4,6 +4,7 @@
#pragma once
#include <memory>
+#include "common/scratch_buffer.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/service.h"
@@ -33,6 +34,8 @@ private:
u64 pid{};
bool is_initialized{};
+ Common::ScratchBuffer<u8> tmp_output;
+ Common::ScratchBuffer<u8> tmp_output_inline;
};
} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp
index da2d5890f..b41c6240c 100644
--- a/src/core/hle/service/nvnflinger/nvnflinger.cpp
+++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp
@@ -70,7 +70,8 @@ Nvnflinger::Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_
[this](std::uintptr_t, s64 time,
std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> {
vsync_signal.store(true);
- vsync_signal.notify_all();
+ { const auto lock_guard = Lock(); }
+ vsync_signal.notify_one();
return std::chrono::nanoseconds(GetNextTicks());
});
diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h
index fb56d75d7..23ba315a0 100644
--- a/src/core/hle/service/nvnflinger/parcel.h
+++ b/src/core/hle/service/nvnflinger/parcel.h
@@ -6,6 +6,7 @@
#include <memory>
#include <span>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "common/assert.h"
@@ -167,7 +168,7 @@ public:
private:
template <typename T>
requires(std::is_trivially_copyable_v<T>)
- void WriteImpl(const T& val, std::vector<u8>& buffer) {
+ void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) {
const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
const size_t old_size = buffer.size();
buffer.resize(old_size + aligned_size);
@@ -176,8 +177,8 @@ private:
}
private:
- std::vector<u8> m_data_buffer;
- std::vector<u8> m_object_buffer;
+ boost::container::small_vector<u8, 0x200> m_data_buffer;
+ boost::container::small_vector<u8, 0x200> m_object_buffer;
};
} // namespace Service::android
diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h
index e6293ffb9..9fc01ea90 100644
--- a/src/core/hle/service/time/clock_types.h
+++ b/src/core/hle/service/time/clock_types.h
@@ -3,6 +3,8 @@
#pragma once
+#include <ratio>
+
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/uuid.h"
@@ -74,18 +76,19 @@ static_assert(std::is_trivially_copyable_v<ContinuousAdjustmentTimePoint>,
/// https://switchbrew.org/wiki/Glue_services#TimeSpanType
struct TimeSpanType {
s64 nanoseconds{};
- static constexpr s64 ns_per_second{1000000000ULL};
s64 ToSeconds() const {
- return nanoseconds / ns_per_second;
+ return nanoseconds / std::nano::den;
}
static TimeSpanType FromSeconds(s64 seconds) {
- return {seconds * ns_per_second};
+ return {seconds * std::nano::den};
}
- static TimeSpanType FromTicks(u64 ticks, u64 frequency) {
- return FromSeconds(static_cast<s64>(ticks) / static_cast<s64>(frequency));
+ template <u64 Frequency>
+ static TimeSpanType FromTicks(u64 ticks) {
+ using TicksToNSRatio = std::ratio<std::nano::den, Frequency>;
+ return {static_cast<s64>(ticks * TicksToNSRatio::num / TicksToNSRatio::den)};
}
};
static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size");
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp
index 3dbbb9850..5627b7003 100644
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -10,7 +10,7 @@ namespace Service::Time::Clock {
TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
const TimeSpanType ticks_time_span{
- TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
+ TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())};
TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};
if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
index 27600413e..0d9fb3143 100644
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -10,7 +10,7 @@ namespace Service::Time::Clock {
SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
const TimeSpanType ticks_time_span{
- TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
+ TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())};
return {ticks_time_span.ToSeconds(), GetClockSourceId()};
}
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 868be60c5..7197ca30f 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -240,8 +240,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(HLERequestCon
const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};
if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
- const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(),
- Core::Hardware::CNTFREQ)};
+ const auto ticks{Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(
+ system.CoreTiming().GetClockTicks())};
const s64 base_time_point{context.offset + current_time_point.time_point -
ticks.ToSeconds()};
IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index ce1c85bcc..a00676669 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -21,8 +21,9 @@ SharedMemory::~SharedMemory() = default;
void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id,
Clock::TimeSpanType current_time_point) {
- const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
- system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
+ const Clock::TimeSpanType ticks_time_span{
+ Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(
+ system.CoreTiming().GetClockTicks())};
const Clock::SteadyClockContext context{
static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
clock_source_id};
diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp
index e1728c06d..205371a26 100644
--- a/src/core/hle/service/time/time_zone_manager.cpp
+++ b/src/core/hle/service/time/time_zone_manager.cpp
@@ -849,8 +849,9 @@ static Result CreateCalendarTime(s64 time, int gmt_offset, CalendarTimeInternal&
static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time,
CalendarTimeInternal& calendar_time,
CalendarAdditionalInfo& calendar_additional_info) {
- if ((rules.go_ahead && time < rules.ats[0]) ||
- (rules.go_back && time > rules.ats[rules.time_count - 1])) {
+ ASSERT(rules.go_ahead ? rules.time_count > 0 : true);
+ if ((rules.go_back && time < rules.ats[0]) ||
+ (rules.go_ahead && time > rules.ats[rules.time_count - 1])) {
s64 seconds{};
if (time < rules.ats[0]) {
seconds = rules.ats[0] - time;
@@ -910,9 +911,13 @@ static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time,
calendar_additional_info.is_dst = rules.ttis[tti_index].is_dst;
const char* time_zone{&rules.chars[rules.ttis[tti_index].abbreviation_list_index]};
- for (int index{}; time_zone[index] != '\0'; ++index) {
+ u32 index;
+ for (index = 0; time_zone[index] != '\0' && time_zone[index] != ',' &&
+ index < calendar_additional_info.timezone_name.size() - 1;
+ ++index) {
calendar_additional_info.timezone_name[index] = time_zone[index];
}
+ calendar_additional_info.timezone_name[index] = '\0';
return ResultSuccess;
}
diff --git a/src/core/hle/service/time/time_zone_service.cpp b/src/core/hle/service/time/time_zone_service.cpp
index e8273e152..8171c82a5 100644
--- a/src/core/hle/service/time/time_zone_service.cpp
+++ b/src/core/hle/service/time/time_zone_service.cpp
@@ -112,20 +112,14 @@ void ITimeZoneService::LoadTimeZoneRule(HLERequestContext& ctx) {
LOG_DEBUG(Service_Time, "called, location_name={}", location_name);
TimeZone::TimeZoneRule time_zone_rule{};
- if (const Result result{
- time_zone_content_manager.LoadTimeZoneRule(time_zone_rule, location_name)};
- result != ResultSuccess) {
- IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(result);
- return;
- }
+ const Result result{time_zone_content_manager.LoadTimeZoneRule(time_zone_rule, location_name)};
std::vector<u8> time_zone_rule_outbuffer(sizeof(TimeZone::TimeZoneRule));
std::memcpy(time_zone_rule_outbuffer.data(), &time_zone_rule, sizeof(TimeZone::TimeZoneRule));
ctx.WriteBuffer(time_zone_rule_outbuffer);
IPC::ResponseBuilder rb{ctx, 2};
- rb.Push(ResultSuccess);
+ rb.Push(result);
}
void ITimeZoneService::ToCalendarTime(HLERequestContext& ctx) {
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index c3c2281bb..9ff4028c2 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_index + element};
- if (xfb_varying_index < runtime_info.xfb_varyings.size()) {
+ if (xfb_varying_index < runtime_info.xfb_count) {
xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 0f86a8004..34592a01f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
}
void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
- if (ctx.runtime_info.xfb_varyings.empty()) {
+ if (ctx.runtime_info.xfb_count == 0) {
return;
}
ctx.AddCapability(spv::Capability::TransformFeedback);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index fd15f47ea..bec5db173 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{};
const size_t xfb_varying_index{base_attr_index + element};
- if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) {
+ if (xfb_varying_index < ctx.runtime_info.xfb_count) {
xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index];
xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
}
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 3b63c249f..619c0b138 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -84,7 +84,8 @@ struct RuntimeInfo {
bool glasm_use_storage_buffers{};
/// Transform feedback state for each varying
- std::vector<TransformFeedbackVarying> xfb_varyings;
+ std::array<TransformFeedbackVarying, 256> xfb_varyings{};
+ u32 xfb_count{0};
};
} // namespace Shader
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9bafd8cc0..58a45ab67 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
- tmp_buffer.resize(amount);
+ tmp_buffer.resize_destructive(amount);
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
return true;
@@ -719,9 +719,15 @@ void BufferCache<P>::BindHostVertexBuffers() {
bool any_valid{false};
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ const Binding& binding = channel_state->vertex_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer, binding.buffer_id);
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
+ flags[Dirty::VertexBuffer0 + index] = false;
+
host_bindings.min_index = std::min(host_bindings.min_index, index);
host_bindings.max_index = std::max(host_bindings.max_index, index);
any_valid = true;
@@ -735,9 +741,6 @@ void BufferCache<P>::BindHostVertexBuffers() {
const Binding& binding = channel_state->vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
- TouchBuffer(buffer, binding.buffer_id);
- SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
-
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1276,7 +1279,7 @@ template <class P>
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
u32 wanted_size) {
static constexpr int STREAM_LEAP_THRESHOLD = 16;
- std::vector<BufferId> overlap_ids;
+ boost::container::small_vector<BufferId, 16> overlap_ids;
VAddr begin = cpu_addr;
VAddr end = cpu_addr + wanted_size;
int stream_score = 0;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 63a120f7a..fe6068cfe 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
struct OverlapResult {
- std::vector<BufferId> ids;
+ boost::container::small_vector<BufferId, 16> ids;
VAddr begin;
VAddr end;
bool has_stream_leap = false;
@@ -582,7 +582,7 @@ private:
BufferId inline_buffer_id;
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
- std::vector<u8> tmp_buffer;
+ Common::ScratchBuffer<u8> tmp_buffer;
};
} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 83112dfce..7d660af47 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -63,7 +63,6 @@ struct ChCommand {
};
using ChCommandHeaderList = std::vector<ChCommandHeader>;
-using ChCommandList = std::vector<ChCommand>;
struct ThiRegisters {
u32_le increment_syncpt{};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1cdb690ed..8a2784cdc 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -6,6 +6,7 @@
#include <array>
#include <span>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include <queue>
#include "common/bit_field.h"
@@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
struct CommandList final {
CommandList() = default;
explicit CommandList(std::size_t size) : command_lists(size) {}
- explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_)
+ explicit CommandList(
+ boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
: prefetch_command_list{std::move(prefetch_command_list_)} {}
- std::vector<CommandListHeader> command_lists;
- std::vector<CommandHeader> prefetch_command_list;
+ boost::container::small_vector<CommandListHeader, 512> command_lists;
+ boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
};
/**
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 0e94c521a..f34090791 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -195,8 +196,12 @@ void DrawManager::DrawTexture() {
if (lower_left) {
draw_texture_state.dst_y0 -= dst_height;
}
- draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
- draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
+ draw_texture_state.dst_x1 =
+ draw_texture_state.dst_x0 +
+ static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_width)));
+ draw_texture_state.dst_y1 =
+ draw_texture_state.dst_y0 +
+ static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_height)));
draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
draw_texture_state.src_x1 =
@@ -207,7 +212,6 @@ void DrawManager::DrawTexture() {
draw_texture_state.src_y0;
draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
draw_texture_state.src_texture = regs.draw_texture.src_texture;
-
maxwell3d->rasterizer->DrawTexture();
}
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..bc1eb41e7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
ASSERT(regs.remap_const.component_size_minus_one == 3);
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
- std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
+ std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
+ std::ranges::fill(span, regs.remap_consta_value);
memory_manager.WriteBlockUnsafe(regs.offset_out,
- reinterpret_cast<u8*>(tmp_buffer.data()),
+ reinterpret_cast<u8*>(read_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
@@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
- std::vector<u8> tmp_buffer(16);
+ read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
- tmp_buffer.data(), tmp_buffer.size());
- memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ read_buffer.data(), read_buffer.size());
+ memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
+ read_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
- std::vector<u8> tmp_buffer(16);
+ read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
- memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
+ read_buffer.size());
memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
- tmp_buffer.data(), tmp_buffer.size());
+ read_buffer.data(), read_buffer.size());
}
} else {
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
- std::vector<u8> tmp_buffer(regs.line_length_in);
- memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ read_buffer.resize_destructive(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
regs.line_length_in);
- memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
+ memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
regs.line_length_in);
}
}
@@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_operand.address = regs.offset_in;
DMA::BufferOperand dst_operand;
- dst_operand.pitch = regs.pitch_out;
+ u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
+ dst_operand.pitch = abs_pitch_out;
dst_operand.width = regs.line_length_in;
dst_operand.height = regs.line_count;
dst_operand.address = regs.offset_out;
@@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+ const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
@@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
- regs.pitch_out);
+ abs_pitch_out);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 456f733cf..db385076d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -193,18 +193,13 @@ struct GPU::Impl {
}
[[nodiscard]] u64 GetTicks() const {
- // This values were reversed engineered by fincs from NVN
- // The gpu clock is reported in units of 385/625 nanoseconds
- constexpr u64 gpu_ticks_num = 384;
- constexpr u64 gpu_ticks_den = 625;
+ u64 gpu_tick = system.CoreTiming().GetGPUTicks();
- u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
if (Settings::values.use_fast_gpu_time.GetValue()) {
- nanoseconds /= 256;
+ gpu_tick /= 256;
}
- const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
- const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
- return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+
+ return gpu_tick;
}
[[nodiscard]] bool IsAsync() const {
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 6ce179167..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
#include <array>
#include <bit>
+#include "common/scratch_buffer.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/host1x.h"
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
}
void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
- std::vector<u8> scan(count);
+ static Common::ScratchBuffer<u8> scan{};
+ scan.resize_destructive(count);
if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
} else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7b2cde7a7..45141e488 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
SetEntry<false>(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) {
- rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
+ rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
}
if constexpr (entry_type == EntryType::Mapped) {
const VAddr current_cpu_addr = cpu_addr + offset;
@@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
SetEntry<true>(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) {
- rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
+ rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
}
if constexpr (entry_type == EntryType::Mapped) {
const VAddr current_cpu_addr = cpu_addr + offset;
@@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) {
- std::vector<u8> tmp_buffer(size);
+ tmp_buffer.resize_destructive(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
// The output block must be flushed in case it has data modified from the GPU.
@@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
return result;
}
-std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
- GPUVAddr gpu_addr, std::size_t size) const {
- std::vector<std::pair<GPUVAddr, std::size_t>> result{};
+boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
+MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+ boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
return result;
}
@@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
template <bool is_gpu_address>
void MemoryManager::GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
- std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
- result) const {
+ boost::container::small_vector<
+ std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
+ const {
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
last_segment{};
std::optional<VAddr> old_page_addr{};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 794535122..4202c26ff 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,10 +8,12 @@
#include <mutex>
#include <optional>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "common/multi_level_page_table.h"
#include "common/range_map.h"
+#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
#include "video_core/cache_types.h"
#include "video_core/pte_kind.h"
@@ -107,8 +109,8 @@ public:
* if the region is continuous, a single pair will be returned. If it's unmapped, an empty
* vector will be returned;
*/
- std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
- std::size_t size) const;
+ boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
+ GPUVAddr gpu_addr, std::size_t size) const;
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -165,7 +167,8 @@ private:
template <bool is_gpu_address>
void GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
- std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
+ boost::container::small_vector<
+ std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
result) const;
Core::System& system;
@@ -215,8 +218,8 @@ private:
Common::VirtualBuffer<u32> big_page_table_cpu;
std::vector<u64> big_page_continuous;
- std::vector<std::pair<VAddr, std::size_t>> page_stash{};
- std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
+ boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
+ boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
mutable std::mutex guard;
@@ -226,6 +229,8 @@ private:
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
static std::atomic<size_t> unique_identifier_generator;
+
+ Common::ScratchBuffer<u8> tmp_buffer;
};
} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 1a0cea9b7..3151c0db8 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -87,7 +87,8 @@ void ComputePipeline::Configure() {
texture_cache.SynchronizeComputeDescriptors();
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
- std::array<GLuint, MAX_TEXTURES> samplers;
+ boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> gl_samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
@@ -131,7 +132,6 @@ void ComputePipeline::Configure() {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- samplers[sampler_binding++] = 0;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
@@ -142,8 +142,8 @@ void ComputePipeline::Configure() {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
- samplers[sampler_binding++] = sampler->Handle();
+ VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
+ samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
@@ -186,10 +186,17 @@ void ComputePipeline::Configure() {
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
u32 texture_scaling_mask{};
+
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ gl_samplers[sampler_binding++] = 0;
+ }
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -198,6 +205,12 @@ void ComputePipeline::Configure() {
texture_scaling_mask |= 1u << texture_binding;
}
++texture_binding;
+
+ const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ gl_samplers[sampler_binding++] =
+ use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
}
}
u32 image_scaling_mask{};
@@ -228,7 +241,7 @@ void ComputePipeline::Configure() {
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
- glBindSamplers(0, sampler_binding, samplers.data());
+ glBindSamplers(0, sampler_binding, gl_samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 89000d6e0..c58f760b8 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -275,9 +275,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
- std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
size_t views_index{};
- GLsizei sampler_binding{};
+ size_t samplers_index{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -337,7 +337,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
- samplers[sampler_binding++] = 0;
}
}
}
@@ -351,8 +350,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
- samplers[sampler_binding++] = sampler->Handle();
+ VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
+ samplers[samplers_index++] = sampler;
}
}
if constexpr (Spec::has_images) {
@@ -445,10 +444,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
program_manager.BindSourcePrograms(source_programs);
}
const VideoCommon::ImageViewInOut* views_it{views.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
+ GLsizei sampler_binding{};
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
+ std::array<GLuint, MAX_TEXTURES> gl_samplers;
const auto prepare_stage{[&](size_t stage) {
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
buffer_cache.BindHostStageBuffers(stage);
@@ -465,6 +467,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
u32 stage_image_binding{};
const auto& info{stage_infos[stage]};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ gl_samplers[sampler_binding++] = 0;
+ }
+ }
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -474,6 +483,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
++texture_binding;
++stage_texture_binding;
+
+ const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ gl_samplers[sampler_binding++] =
+ use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
}
}
for (const auto& desc : info.image_descriptors) {
@@ -534,7 +549,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
- glBindSamplers(0, sampler_binding, samplers.data());
+ glBindSamplers(0, sampler_binding, gl_samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Shader::Stage::VertexB:
case Shader::Stage::Geometry:
if (!use_assembly_shaders && key.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
break;
case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 1c5dbcdd8..3b446be07 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1268,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
- sampler.Create();
- const GLuint handle = sampler.handle;
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
- glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
- glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
- glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
- glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
- glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
- glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
- glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
- glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
-
- if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
- const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
- glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy);
- } else {
- LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
- }
- if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
- glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
- } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
- LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
- }
- if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
- glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
- } else if (seamless == GL_FALSE) {
- // We default to false because it's more common
- LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
+
+ const auto create_sampler = [&](const f32 anisotropy) {
+ OGLSampler new_sampler;
+ new_sampler.Create();
+ const GLuint handle = new_sampler.handle;
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
+ glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
+ glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
+ glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
+ glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
+ glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
+
+ if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy);
+ } else {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
+ }
+ if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
+ glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
+ } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
+ }
+ if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
+ } else if (seamless == GL_FALSE) {
+ // We default to false because it's more common
+ LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ }
+ return new_sampler;
+ };
+
+ sampler = create_sampler(max_anisotropy);
+
+ const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy);
+ if (max_anisotropy > max_anisotropy_default) {
+ sampler_default_anisotropy = create_sampler(max_anisotropy_default);
}
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1148b73d7..3676eaaa9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -309,12 +309,21 @@ class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
- GLuint Handle() const noexcept {
+ [[nodiscard]] GLuint Handle() const noexcept {
return sampler.handle;
}
+ [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept {
+ return sampler_default_anisotropy.handle;
+ }
+
+ [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
+ return static_cast<bool>(sampler_default_anisotropy.handle);
+ }
+
private:
OGLSampler sampler;
+ OGLSampler sampler_default_anisotropy;
};
class Framebuffer {
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 983e1c2e1..71c783709 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -178,7 +178,7 @@ public:
inline void PushImageDescriptors(TextureCache& texture_cache,
GuestDescriptorQueue& guest_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
- const VkSampler*& samplers,
+ const VideoCommon::SamplerId*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
@@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
- const VkSampler sampler{*(samplers++)};
+ const VideoCommon::SamplerId sampler_id{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
- guest_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+ const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
+ : sampler.Handle()};
+ guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8c33722d3..f47301ad5 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
- boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -516,15 +516,15 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
buffer_handles.push_back(handle);
}
if (device.IsExtExtendedDynamicStateSupported()) {
- scheduler.Record([bindings = bindings,
- buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffers2EXT(
bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data());
});
} else {
- scheduler.Record([bindings = bindings,
- buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index,
buffer_handles.data(), bindings.offsets.data());
});
@@ -561,12 +561,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
for (u32 index = 0; index < bindings.buffers.size(); ++index) {
buffer_handles.push_back(bindings.buffers[index]->Handle());
}
- scheduler.Record(
- [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
- buffer_handles.data(), bindings.offsets.data(),
- bindings.sizes.data());
- });
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
+ buffer_handles.data(), bindings.offsets.data(),
+ bindings.sizes.data());
+ });
}
void BufferCacheRuntime::ReserveNullBuffer() {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 733e70d9d..73e585c2b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
static constexpr size_t max_elements = 64;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
- boost::container::static_vector<VkSampler, max_elements> samplers;
+ boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
@@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
- samplers.push_back(sampler->Handle());
+ VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
+ samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
@@ -192,7 +192,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
buffer_cache.BindHostComputeBuffers();
RescalingPushConstant rescaling;
- const VkSampler* samplers_it{samplers.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it,
views_it);
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 506b78f08..c1595642e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
- std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
+ std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{};
size_t view_index{};
@@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[view_index++] = {handle.first};
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
- samplers[sampler_index++] = sampler->Handle();
+ VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
+ samplers[sampler_index++] = sampler;
}
}
if constexpr (Spec::has_images) {
@@ -453,7 +453,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
RescalingPushConstant rescaling;
RenderAreaPushConstant render_area;
- const VkSampler* samplers_it{samplers.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index b128c4f6e..5eeda08d2 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -3,6 +3,7 @@
#include <thread>
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 18e040a1b..9f316113c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
}
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
break;
@@ -705,10 +711,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
- // TODO: Remove this when Intel fixes their shader compiler.
- // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
- if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS &&
- !Settings::values.enable_compute_pipelines.GetValue()) {
+ if (device.HasBrokenCompute()) {
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
return nullptr;
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8711e2a87..f3cef09dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
- std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
- std::vector<VkBufferCopy> result(copies.size());
+[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
+TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
+ boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
std::ranges::transform(
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return result;
}
-[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
+[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
struct Maker {
VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
VkImageAspectFlags aspect_mask;
};
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- std::vector<VkBufferImageCopy> result(copies.size() * 2);
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
std::ranges::transform(copies, result.begin(),
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
std::ranges::transform(copies, result.begin() + copies.size(),
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
return result;
} else {
- std::vector<VkBufferImageCopy> result(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
return result;
}
@@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
- std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
@@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkImageCopy> vk_copies(copies.size());
+ boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask());
@@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
ScaleDown(true);
}
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
+ auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
if (is_rescaled) {
ScaleDown();
}
- boost::container::small_vector<VkBuffer, 1> buffers_vector{};
- boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
+ boost::container::small_vector<VkBuffer, 8> buffers_vector{};
+ boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
+ vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
@@ -1802,27 +1803,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
// Some games have samplers with garbage. Sanitize them here.
const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
- sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .pNext = pnext,
- .flags = 0,
- .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
- .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
- .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
- .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
- .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
- .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
- .mipLodBias = tsc.LodBias(),
- .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
- .maxAnisotropy = max_anisotropy,
- .compareEnable = tsc.depth_compare_enabled,
- .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
- .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
- .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
- .borderColor =
- arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
- .unnormalizedCoordinates = VK_FALSE,
- });
+ const auto create_sampler = [&](const f32 anisotropy) {
+ return device.GetLogical().CreateSampler(VkSamplerCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = pnext,
+ .flags = 0,
+ .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+ .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+ .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+ .mipLodBias = tsc.LodBias(),
+ .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
+ .maxAnisotropy = anisotropy,
+ .compareEnable = tsc.depth_compare_enabled,
+ .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+ .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
+ .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
+ .borderColor =
+ arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
+ .unnormalizedCoordinates = VK_FALSE,
+ });
+ };
+
+ sampler = create_sampler(max_anisotropy);
+
+ const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy);
+ if (max_anisotropy > max_anisotropy_default) {
+ sampler_default_anisotropy = create_sampler(max_anisotropy_default);
+ }
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
@@ -1849,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, bool is_rescaled) {
- std::vector<VkImageView> attachments;
+ boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0f7a5ffd4..f14525dcb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -279,8 +279,17 @@ public:
return *sampler;
}
+ [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept {
+ return *sampler_default_anisotropy;
+ }
+
+ [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
+ return static_cast<bool>(sampler_default_anisotropy);
+ }
+
private:
vk::Sampler sampler;
+ vk::Sampler sampler_default_anisotropy;
};
class Framebuffer {
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index c5213875b..4db948b6d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
marked_for_removal.end());
- std::vector<ShaderInfo*> removed_shaders;
- removed_shaders.reserve(marked_for_removal.size());
+ boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
std::scoped_lock lock{lookup_mutex};
-
for (Entry* const entry : marked_for_removal) {
removed_shaders.push_back(entry->data);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
#include <array>
#include <optional>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids;
- std::vector<u32> slice_offsets;
- std::vector<SubresourceBase> slice_subresources;
+ boost::container::small_vector<u32, 16> slice_offsets;
+ boost::container::small_vector<SubresourceBase, 16> slice_subresources;
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index d134b6738..0c5f4450d 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -45,4 +45,56 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in
ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {}
+bool ImageViewBase::SupportsAnisotropy() const noexcept {
+ const bool has_mips = range.extent.levels > 1;
+ const bool is_2d = type == ImageViewType::e2D || type == ImageViewType::e2DArray;
+ if (!has_mips || !is_2d) {
+ return false;
+ }
+
+ switch (format) {
+ case PixelFormat::R8_UNORM:
+ case PixelFormat::R8_SNORM:
+ case PixelFormat::R8_SINT:
+ case PixelFormat::R8_UINT:
+ case PixelFormat::BC4_UNORM:
+ case PixelFormat::BC4_SNORM:
+ case PixelFormat::BC5_UNORM:
+ case PixelFormat::BC5_SNORM:
+ case PixelFormat::R32G32_FLOAT:
+ case PixelFormat::R32G32_SINT:
+ case PixelFormat::R32_FLOAT:
+ case PixelFormat::R16_FLOAT:
+ case PixelFormat::R16_UNORM:
+ case PixelFormat::R16_SNORM:
+ case PixelFormat::R16_UINT:
+ case PixelFormat::R16_SINT:
+ case PixelFormat::R16G16_UNORM:
+ case PixelFormat::R16G16_FLOAT:
+ case PixelFormat::R16G16_UINT:
+ case PixelFormat::R16G16_SINT:
+ case PixelFormat::R16G16_SNORM:
+ case PixelFormat::R8G8_UNORM:
+ case PixelFormat::R8G8_SNORM:
+ case PixelFormat::R8G8_SINT:
+ case PixelFormat::R8G8_UINT:
+ case PixelFormat::R32G32_UINT:
+ case PixelFormat::R32_UINT:
+ case PixelFormat::R32_SINT:
+ case PixelFormat::G4R4_UNORM:
+ // Depth formats
+ case PixelFormat::D32_FLOAT:
+ case PixelFormat::D16_UNORM:
+ // Stencil formats
+ case PixelFormat::S8_UINT:
+ // DepthStencil formats
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return false;
+ default:
+ return true;
+ }
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index a25ae1d4a..87549ffff 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -33,6 +33,8 @@ struct ImageViewBase {
return type == ImageViewType::Buffer;
}
+ [[nodiscard]] bool SupportsAnisotropy() const noexcept;
+
ImageId image_id{};
GPUVAddr gpu_addr = 0;
PixelFormat format{};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..d3f03a995 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
template <class P>
void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
+ if (!Settings::values.barrier_feedback_loops.GetValue()) {
+ return;
+ }
+
const bool requires_barrier = [&] {
for (const auto& view : views) {
if (!view.id) {
@@ -222,30 +226,50 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
+ return &slot_samplers[GetGraphicsSamplerId(index)];
+}
+
+template <class P>
+typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
+ return &slot_samplers[GetComputeSamplerId(index)];
+}
+
+template <class P>
+SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
if (index > channel_state->graphics_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
- return &slot_samplers[NULL_SAMPLER_ID];
+ return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
SamplerId& id = channel_state->graphics_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
- return &slot_samplers[id];
+ return id;
}
template <class P>
-typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
+SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
if (index > channel_state->compute_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
- return &slot_samplers[NULL_SAMPLER_ID];
+ return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
- return &slot_samplers[id];
+ return id;
+}
+
+template <class P>
+const typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) const noexcept {
+ return slot_samplers[id];
+}
+
+template <class P>
+typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
+ return slot_samplers[id];
}
template <class P>
@@ -280,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
}
template <class P>
-bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
+bool TextureCache<P>::RescaleRenderTargets() {
auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
@@ -318,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
if (flags[Dirty::ColorBuffer0 + index] || force) {
flags[Dirty::ColorBuffer0 + index] = false;
- BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index));
}
check_rescale(color_buffer_id, tmp_color_images[index]);
}
if (flags[Dirty::ZetaBuffer] || force) {
flags[Dirty::ZetaBuffer] = false;
- BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer());
}
check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
@@ -389,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
return;
}
- const bool rescaled = RescaleRenderTargets(is_clear);
+ const bool rescaled = RescaleRenderTargets();
if (is_rescaling != rescaled) {
flags[Dirty::RescaleViewports] = true;
flags[Dirty::RescaleScissors] = true;
@@ -502,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
- std::vector<ImageId> images;
+ boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
@@ -555,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
template <class P>
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
- std::vector<ImageId> deleted_images;
+ boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
@@ -569,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
- std::vector<ImageId> deleted_images;
+ boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
@@ -1077,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
const bool native_bgr = runtime.HasNativeBgr();
const bool flexible_formats = True(options & RelaxedOptions::Format);
ImageId image_id{};
- boost::container::small_vector<ImageId, 1> image_ids;
+ boost::container::small_vector<ImageId, 8> image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1598,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
}
}
ImageId image_id{};
- boost::container::small_vector<ImageId, 1> image_ids;
+ boost::container::small_vector<ImageId, 8> image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1658,7 +1682,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
}
template <class P>
-ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
+ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
@@ -1672,11 +1696,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template <class P>
-ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
+ImageViewId TextureCache<P>::FindDepthBuffer() {
const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
@@ -1686,18 +1710,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template <class P>
-ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear) {
- const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
+ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) {
ImageId image_id{};
bool delete_state = has_deleted_images;
do {
has_deleted_images = false;
- image_id = FindOrInsertImage(info, gpu_addr, options);
+ image_id = FindOrInsertImage(info, gpu_addr);
delete_state |= has_deleted_images;
} while (has_deleted_images);
has_deleted_images = delete_state;
@@ -1920,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.map_view_id = map_id;
return;
}
- std::vector<ImageViewId> sparse_maps{};
+ boost::container::small_vector<ImageViewId, 16> sparse_maps;
ForEachSparseSegment(
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2195,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
template <class P>
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
- boost::container::small_vector<const AliasedImage*, 1> aliased_images;
+ boost::container::small_vector<const AliasedImage*, 8> aliased_images;
Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 3bfa92154..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
struct AsyncDecodeContext {
ImageId image_id;
Common::ScratchBuffer<u8> decoded_data;
- std::vector<BufferImageCopy> copies;
+ boost::container::small_vector<BufferImageCopy, 16> copies;
std::mutex mutex;
std::atomic_bool complete;
};
@@ -159,6 +159,18 @@ public:
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
+ /// Get the sampler id from the graphics descriptor table in the specified index
+ SamplerId GetGraphicsSamplerId(u32 index);
+
+ /// Get the sampler id from the compute descriptor table in the specified index
+ SamplerId GetComputeSamplerId(u32 index);
+
+ /// Return a constant reference to the given sampler id
+ [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
+
+ /// Return a reference to the given sampler id
+ [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
+
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
@@ -166,9 +178,8 @@ public:
void SynchronizeComputeDescriptors();
/// Updates the Render Targets if they can be rescaled
- /// @param is_clear True when the render targets are being used for clears
/// @retval True if the Render Targets have been rescaled.
- bool RescaleRenderTargets(bool is_clear);
+ bool RescaleRenderTargets();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
@@ -324,14 +335,13 @@ private:
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
- [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
+ [[nodiscard]] ImageViewId FindColorBuffer(size_t index);
/// Find or create an image view for the depth buffer
- [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
+ [[nodiscard]] ImageViewId FindDepthBuffer();
/// Find or create a view for a render target with the given image parameters
- [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear);
+ [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
/// Iterates over all the images in a region calling func
template <typename Func>
@@ -419,7 +429,7 @@ private:
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
- std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+ std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
VAddr virtual_invalid_space{};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 95a5b47d8..f781cb7a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
- const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
+ const auto slice_offsets = CalculateSliceOffsets(new_info);
const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
const auto it = std::ranges::find(slice_offsets, diff);
if (it == slice_offsets.end()) {
return std::nullopt;
}
- const std::vector subresources = CalculateSliceSubresources(new_info);
+ const auto subresources = CalculateSliceSubresources(new_info);
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
const ImageInfo& info = overlap.info;
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
return sizes;
}
-std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
+boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector<u32> offsets;
+ boost::container::small_vector<u32, 16> offsets;
offsets.reserve(NumSlices(info));
const LevelInfo level_info = MakeLevelInfo(info);
@@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
return offsets;
}
-std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
+boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
+ const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector<SubresourceBase> subresources;
+ boost::container::small_vector<SubresourceBase, 16> subresources;
subresources.reserve(NumSlices(info));
for (s32 level = 0; level < info.resources.levels; ++level) {
const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
}
}
-std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
- SubresourceBase base, u32 up_scale, u32 down_shift) {
+boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
+ const ImageInfo& src,
+ SubresourceBase base,
+ u32 up_scale, u32 down_shift) {
ASSERT(dst.resources.levels >= src.resources.levels);
const bool is_dst_3d = dst.type == ImageType::e3D;
@@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
ASSERT(src.resources.levels == 1);
}
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
- std::vector<ImageCopy> copies;
+ boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels);
for (s32 level = 0; level < src.resources.levels; ++level) {
ImageCopy& copy = copies.emplace_back();
@@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
-std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale,
- u32 down_shift) {
- std::vector<ImageCopy> copies;
+boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
+ u32 up_scale,
+ u32 down_shift) {
+ boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels);
const bool is_3d = src.type == ImageType::e3D;
for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
-std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
- const ImageInfo& info, std::span<const u8> input,
- std::span<u8> output) {
+boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
+ GPUVAddr gpu_addr,
+ const ImageInfo& info,
+ std::span<const u8> input,
+ std::span<u8> output) {
const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const Extent3D size = info.size;
@@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
info.tile_width_spacing);
size_t guest_offset = 0;
u32 host_offset = 0;
- std::vector<BufferImageCopy> copies(num_levels);
+ boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
@@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
}
}
-std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
+boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) {
@@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0;
- std::vector<BufferImageCopy> copies(num_levels);
+ boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
return AdjustMipBlockSize(num_tiles, level_info.block, level);
}
-std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
+boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
const Extent2D tile_size = DefaultBlockSize(info.format);
if (info.type == ImageType::Linear) {
- return std::vector{SwizzleParameters{
+ return {SwizzleParameters{
.num_tiles = AdjustTileSize(info.size, tile_size),
.block = {},
.buffer_offset = 0,
@@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
const s32 num_levels = info.resources.levels;
u32 guest_offset = 0;
- std::vector<SwizzleParameters> params(num_levels);
+ boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 84aa6880d..ab45a43c4 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -5,6 +5,7 @@
#include <optional>
#include <span>
+#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "common/scratch_buffer.h"
@@ -40,9 +41,10 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
-[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
-[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
+ const ImageInfo& info);
[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
@@ -51,21 +53,18 @@ struct OverlapResult {
[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
-[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
- const ImageInfo& src,
- SubresourceBase base, u32 up_scale = 1,
- u32 down_shift = 0);
+[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
+ const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
+ u32 down_shift = 0);
-[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src,
- u32 up_scale = 1,
- u32 down_shift = 0);
+[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
+ const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
-[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
- GPUVAddr gpu_addr, const ImageInfo& info,
- std::span<const u8> input,
- std::span<u8> output);
+[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
+ Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const u8> input, std::span<u8> output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output);
@@ -73,13 +72,15 @@ struct OverlapResult {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies);
-[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
+ const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
-[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
+ const ImageInfo& info);
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const BufferImageCopy> copies, std::span<const u8> memory,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4a80a59f9..d8b88d9bc 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -62,7 +62,12 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept {
}
float TSCEntry::MaxAnisotropy() const noexcept {
- if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) {
+ const bool is_suitable_mipmap_filter = mipmap_filter != TextureMipmapFilter::None;
+ const bool has_regular_lods = min_lod_clamp == 0 && max_lod_clamp >= 256;
+ const bool is_bilinear_filter = min_filter == TextureFilter::Linear &&
+ reduction_filter == SamplerReduction::WeightedAverage;
+ if (max_anisotropy == 0 && (!is_suitable_mipmap_filter || !has_regular_lods ||
+ !is_bilinear_filter || depth_compare_enabled)) {
return 1.0f;
}
const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue();
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index 155599316..1f353d2df 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -13,7 +13,7 @@
namespace VideoCommon {
-std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
const TransformFeedbackState& state) {
static constexpr std::array VECTORS{
28U, // gl_Position
@@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
216U, // gl_TexCoord[6]
220U, // gl_TexCoord[7]
};
- std::vector<Shader::TransformFeedbackVarying> xfb(256);
+ std::array<Shader::TransformFeedbackVarying, 256> xfb{};
+ u32 count{0};
for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
const auto& locations = state.varyings[buffer];
const auto& layout = state.layouts[buffer];
@@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
}
}
xfb[attribute] = varying;
+ count = std::max(count, attribute);
highest = std::max(highest, (base_offset + varying.components) * 4);
}
UNIMPLEMENTED_IF(highest != layout.stride);
}
- return xfb;
+ return {xfb, count + 1};
}
} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
index d13eb16c3..401b1352a 100644
--- a/src/video_core/transform_feedback.h
+++ b/src/video_core/transform_feedback.h
@@ -24,7 +24,7 @@ struct TransformFeedbackState {
varyings;
};
-std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
const TransformFeedbackState& state);
} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3d2e9a16a..b11abe311 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
std::vector<const char*> ExtensionListForVulkan(
const std::set<std::string, std::less<>>& extensions) {
std::vector<const char*> output;
+ output.reserve(extensions.size());
for (const auto& extension : extensions) {
output.push_back(extension.c_str());
}
@@ -562,6 +563,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
+ has_broken_compute =
+ CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
+ !Settings::values.enable_compute_pipelines.GetValue();
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = true;
@@ -783,9 +787,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION);
FOR_EACH_VK_EXTENSION(EXTENSION);
-#ifdef _WIN32
- FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
-#endif
#undef FEATURE_EXTENSION
#undef EXTENSION
@@ -804,11 +805,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION);
FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION);
-#ifdef _WIN32
- FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION);
-#else
- FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION);
-#endif
if (requires_swapchain) {
CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index f314d0ffe..0b634a876 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,6 +10,7 @@
#include <vector>
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -68,7 +69,6 @@
EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \
EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \
EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \
- EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \
EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \
EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \
EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
@@ -80,9 +80,6 @@
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle)
-#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \
- EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32)
-
// Define extensions which must be supported.
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
@@ -90,12 +87,6 @@
EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
-#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \
- EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME)
-
-#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \
- EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME)
-
// Define extensions where the absence of the extension may result in a degraded experience.
#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
@@ -528,6 +519,11 @@ public:
return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue();
}
+ /// @returns True if compute pipelines can cause crashing.
+ bool HasBrokenCompute() const {
+ return has_broken_compute;
+ }
+
/// Returns true when the device does not properly support cube compatibility.
bool HasBrokenCubeImageCompability() const {
return has_broken_cube_compatibility;
@@ -589,6 +585,22 @@ public:
return supports_conditional_barriers;
}
+ [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
+ u32 driver_version) {
+ if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ const u32 major = VK_API_VERSION_MAJOR(driver_version);
+ const u32 minor = VK_API_VERSION_MINOR(driver_version);
+ const u32 patch = VK_API_VERSION_PATCH(driver_version);
+ if (major == 0 && minor == 405 && patch < 286) {
+ LOG_WARNING(
+ Render_Vulkan,
+ "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute");
+ return true;
+ }
+ }
+ return false;
+ }
+
private:
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
@@ -636,7 +648,6 @@ private:
FOR_EACH_VK_FEATURE_1_3(FEATURE);
FOR_EACH_VK_FEATURE_EXT(FEATURE);
FOR_EACH_VK_EXTENSION(EXTENSION);
- FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
#undef EXTENSION
#undef FEATURE
@@ -683,6 +694,7 @@ private:
bool is_integrated{}; ///< Is GPU an iGPU.
bool is_virtual{}; ///< Is GPU a virtual GPU.
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
+ bool has_broken_compute{}; ///< Compute shaders can cause crashes
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 84d9ca796..733c296e4 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -210,6 +210,8 @@ add_executable(yuzu
util/url_request_interceptor.h
util/util.cpp
util/util.h
+ vk_device_info.cpp
+ vk_device_info.h
compatdb.cpp
compatdb.h
yuzu.qrc
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index bac9dff90..edc206a25 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -761,6 +761,7 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
ReadGlobalSetting(Settings::values.enable_compute_pipelines);
ReadGlobalSetting(Settings::values.use_video_framerate);
+ ReadGlobalSetting(Settings::values.barrier_feedback_loops);
ReadGlobalSetting(Settings::values.bg_red);
ReadGlobalSetting(Settings::values.bg_green);
ReadGlobalSetting(Settings::values.bg_blue);
@@ -1417,6 +1418,7 @@ void Config::SaveRendererValues() {
WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
WriteGlobalSetting(Settings::values.enable_compute_pipelines);
WriteGlobalSetting(Settings::values.use_video_framerate);
+ WriteGlobalSetting(Settings::values.barrier_feedback_loops);
WriteGlobalSetting(Settings::values.bg_red);
WriteGlobalSetting(Settings::values.bg_green);
WriteGlobalSetting(Settings::values.bg_blue);
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index 8e76a819a..bdf83ebfe 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -6,6 +6,7 @@
#include "common/settings.h"
#include "core/core.h"
#include "ui_configure.h"
+#include "vk_device_info.h"
#include "yuzu/configuration/config.h"
#include "yuzu/configuration/configure_audio.h"
#include "yuzu/configuration/configure_cpu.h"
@@ -28,6 +29,7 @@
ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_,
InputCommon::InputSubsystem* input_subsystem,
+ std::vector<VkDeviceInfo::Record>& vk_device_records,
Core::System& system_, bool enable_web_config)
: QDialog(parent), ui{std::make_unique<Ui::ConfigureDialog>()},
registry(registry_), system{system_}, audio_tab{std::make_unique<ConfigureAudio>(system_,
@@ -38,7 +40,8 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_,
general_tab{std::make_unique<ConfigureGeneral>(system_, this)},
graphics_advanced_tab{std::make_unique<ConfigureGraphicsAdvanced>(system_, this)},
graphics_tab{std::make_unique<ConfigureGraphics>(
- system_, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this)},
+ system_, vk_device_records, [&]() { graphics_advanced_tab->ExposeComputeOption(); },
+ this)},
hotkeys_tab{std::make_unique<ConfigureHotkeys>(system_.HIDCore(), this)},
input_tab{std::make_unique<ConfigureInput>(system_, this)},
network_tab{std::make_unique<ConfigureNetwork>(system_, this)},
diff --git a/src/yuzu/configuration/configure_dialog.h b/src/yuzu/configuration/configure_dialog.h
index a086a07c4..2a08b7fee 100644
--- a/src/yuzu/configuration/configure_dialog.h
+++ b/src/yuzu/configuration/configure_dialog.h
@@ -4,7 +4,9 @@
#pragma once
#include <memory>
+#include <vector>
#include <QDialog>
+#include "yuzu/vk_device_info.h"
namespace Core {
class System;
@@ -40,8 +42,9 @@ class ConfigureDialog : public QDialog {
public:
explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_,
- InputCommon::InputSubsystem* input_subsystem, Core::System& system_,
- bool enable_web_config = true);
+ InputCommon::InputSubsystem* input_subsystem,
+ std::vector<VkDeviceInfo::Record>& vk_device_records,
+ Core::System& system_, bool enable_web_config = true);
~ConfigureDialog() override;
void ApplyConfiguration();
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 431585216..a4965524a 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -1,10 +1,6 @@
// SPDX-FileCopyrightText: 2016 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
-// Include this early to include Vulkan headers how we want to
-#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
#include <algorithm>
#include <functional>
#include <iosfwd>
@@ -34,13 +30,11 @@
#include "common/settings.h"
#include "core/core.h"
#include "ui_configure_graphics.h"
-#include "video_core/vulkan_common/vulkan_instance.h"
-#include "video_core/vulkan_common/vulkan_library.h"
-#include "video_core/vulkan_common/vulkan_surface.h"
#include "yuzu/configuration/configuration_shared.h"
#include "yuzu/configuration/configure_graphics.h"
#include "yuzu/qt_common.h"
#include "yuzu/uisettings.h"
+#include "yuzu/vk_device_info.h"
static const std::vector<VkPresentModeKHR> default_present_modes{VK_PRESENT_MODE_IMMEDIATE_KHR,
VK_PRESENT_MODE_FIFO_KHR};
@@ -77,9 +71,10 @@ static constexpr Settings::VSyncMode PresentModeToSetting(VkPresentModeKHR mode)
}
ConfigureGraphics::ConfigureGraphics(const Core::System& system_,
+ std::vector<VkDeviceInfo::Record>& records_,
const std::function<void()>& expose_compute_option_,
QWidget* parent)
- : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()},
+ : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, records{records_},
expose_compute_option{expose_compute_option_}, system{system_} {
vulkan_device = Settings::values.vulkan_device.GetValue();
RetrieveVulkanDevices();
@@ -504,47 +499,19 @@ void ConfigureGraphics::UpdateAPILayout() {
}
}
-void ConfigureGraphics::RetrieveVulkanDevices() try {
- if (UISettings::values.has_broken_vulkan) {
- return;
- }
-
- using namespace Vulkan;
-
- auto* window = this->window()->windowHandle();
- auto wsi = QtCommon::GetWindowSystemInfo(window);
-
- vk::InstanceDispatch dld;
- const auto library = OpenLibrary();
- const vk::Instance instance = CreateInstance(*library, dld, VK_API_VERSION_1_1, wsi.type);
- const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
- vk::SurfaceKHR surface = CreateSurface(instance, wsi);
-
+void ConfigureGraphics::RetrieveVulkanDevices() {
vulkan_devices.clear();
- vulkan_devices.reserve(physical_devices.size());
+ vulkan_devices.reserve(records.size());
device_present_modes.clear();
- device_present_modes.reserve(physical_devices.size());
- for (const VkPhysicalDevice device : physical_devices) {
- const auto physical_device = vk::PhysicalDevice(device, dld);
- const std::string name = physical_device.GetProperties().deviceName;
- const std::vector<VkPresentModeKHR> present_modes =
- physical_device.GetSurfacePresentModesKHR(*surface);
- vulkan_devices.push_back(QString::fromStdString(name));
- device_present_modes.push_back(present_modes);
-
- VkPhysicalDeviceDriverProperties driver_properties{};
- driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
- driver_properties.pNext = nullptr;
- VkPhysicalDeviceProperties2 properties{};
- properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
- properties.pNext = &driver_properties;
- dld.vkGetPhysicalDeviceProperties2(physical_device, &properties);
- if (driver_properties.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ device_present_modes.reserve(records.size());
+ for (const auto& record : records) {
+ vulkan_devices.push_back(QString::fromStdString(record.name));
+ device_present_modes.push_back(record.vsync_support);
+
+ if (record.has_broken_compute) {
expose_compute_option();
}
}
-} catch (const Vulkan::vk::Exception& exception) {
- LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
}
Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index 364b1cac2..be9310b74 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -12,6 +12,7 @@
#include <qobjectdefs.h>
#include <vulkan/vulkan_core.h>
#include "common/common_types.h"
+#include "vk_device_info.h"
class QEvent;
class QObject;
@@ -39,6 +40,7 @@ class ConfigureGraphics : public QWidget {
public:
explicit ConfigureGraphics(const Core::System& system_,
+ std::vector<VkDeviceInfo::Record>& records,
const std::function<void()>& expose_compute_option_,
QWidget* parent = nullptr);
~ConfigureGraphics() override;
@@ -77,6 +79,7 @@ private:
ConfigurationShared::CheckState use_disk_shader_cache;
ConfigurationShared::CheckState use_asynchronous_gpu_emulation;
+ std::vector<VkDeviceInfo::Record>& records;
std::vector<QString> vulkan_devices;
std::vector<std::vector<VkPresentModeKHR>> device_present_modes;
std::vector<VkPresentModeKHR>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 0463ac8b9..c0a044767 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -43,6 +43,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
ui->enable_compute_pipelines_checkbox->setChecked(
Settings::values.enable_compute_pipelines.GetValue());
ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue());
+ ui->barrier_feedback_loops_checkbox->setChecked(
+ Settings::values.barrier_feedback_loops.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setCurrentIndex(
@@ -94,6 +96,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
enable_compute_pipelines);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_video_framerate,
ui->use_video_framerate_checkbox, use_video_framerate);
+ ConfigurationShared::ApplyPerGameSetting(&Settings::values.barrier_feedback_loops,
+ ui->barrier_feedback_loops_checkbox,
+ barrier_feedback_loops);
}
void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
@@ -130,6 +135,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
Settings::values.enable_compute_pipelines.UsingGlobal());
ui->use_video_framerate_checkbox->setEnabled(
Settings::values.use_video_framerate.UsingGlobal());
+ ui->barrier_feedback_loops_checkbox->setEnabled(
+ Settings::values.barrier_feedback_loops.UsingGlobal());
return;
}
@@ -157,6 +164,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->use_video_framerate_checkbox,
Settings::values.use_video_framerate,
use_video_framerate);
+ ConfigurationShared::SetColoredTristate(ui->barrier_feedback_loops_checkbox,
+ Settings::values.barrier_feedback_loops,
+ barrier_feedback_loops);
ConfigurationShared::SetColoredComboBox(
ui->gpu_accuracy, ui->label_gpu_accuracy,
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index a4dc8ceb0..369a7c83e 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -48,6 +48,7 @@ private:
ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;
ConfigurationShared::CheckState enable_compute_pipelines;
ConfigurationShared::CheckState use_video_framerate;
+ ConfigurationShared::CheckState barrier_feedback_loops;
const Core::System& system;
};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index e7f0ef6be..d527a6f38 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -202,6 +202,16 @@ Compute pipelines are always enabled on all other drivers.</string>
</widget>
</item>
<item>
+ <widget class="QCheckBox" name="barrier_feedback_loops_checkbox">
+ <property name="toolTip">
+ <string>Improves rendering of transparency effects in specific games.</string>
+ </property>
+ <property name="text">
+ <string>Barrier feedback loops</string>
+ </property>
+ </widget>
+ </item>
+ <item>
<widget class="QWidget" name="af_layout" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_1">
<property name="leftMargin">
diff --git a/src/yuzu/configuration/configure_per_game.cpp b/src/yuzu/configuration/configure_per_game.cpp
index 7ac162586..eb96e6068 100644
--- a/src/yuzu/configuration/configure_per_game.cpp
+++ b/src/yuzu/configuration/configure_per_game.cpp
@@ -6,6 +6,7 @@
#include <memory>
#include <string>
#include <utility>
+#include <vector>
#include <fmt/format.h>
@@ -34,8 +35,10 @@
#include "yuzu/configuration/configure_system.h"
#include "yuzu/uisettings.h"
#include "yuzu/util/util.h"
+#include "yuzu/vk_device_info.h"
ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name,
+ std::vector<VkDeviceInfo::Record>& vk_device_records,
Core::System& system_)
: QDialog(parent),
ui(std::make_unique<Ui::ConfigurePerGame>()), title_id{title_id_}, system{system_} {
@@ -50,7 +53,7 @@ ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::st
general_tab = std::make_unique<ConfigureGeneral>(system_, this);
graphics_advanced_tab = std::make_unique<ConfigureGraphicsAdvanced>(system_, this);
graphics_tab = std::make_unique<ConfigureGraphics>(
- system_, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this);
+ system_, vk_device_records, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this);
input_tab = std::make_unique<ConfigureInputPerGame>(system_, game_config.get(), this);
system_tab = std::make_unique<ConfigureSystem>(system_, this);
diff --git a/src/yuzu/configuration/configure_per_game.h b/src/yuzu/configuration/configure_per_game.h
index 85752f1fa..7ec1ded06 100644
--- a/src/yuzu/configuration/configure_per_game.h
+++ b/src/yuzu/configuration/configure_per_game.h
@@ -5,11 +5,13 @@
#include <memory>
#include <string>
+#include <vector>
#include <QDialog>
#include <QList>
#include "core/file_sys/vfs_types.h"
+#include "vk_device_info.h"
#include "yuzu/configuration/config.h"
namespace Core {
@@ -45,6 +47,7 @@ class ConfigurePerGame : public QDialog {
public:
// Cannot use std::filesystem::path due to https://bugreports.qt.io/browse/QTBUG-73263
explicit ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name,
+ std::vector<VkDeviceInfo::Record>& vk_device_records,
Core::System& system_);
~ConfigurePerGame() override;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index cba7c3cce..45a39451d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -147,6 +147,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "yuzu/startup_checks.h"
#include "yuzu/uisettings.h"
#include "yuzu/util/clickable_label.h"
+#include "yuzu/vk_device_info.h"
#ifdef YUZU_DBGHELP
#include "yuzu/mini_dump.h"
@@ -440,6 +441,8 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan
renderer_status_button->setDisabled(true);
renderer_status_button->setChecked(false);
+ } else {
+ VkDeviceInfo::PopulateRecords(vk_device_records, this->window()->windowHandle());
}
#if defined(HAVE_SDL2) && !defined(_WIN32)
@@ -3494,7 +3497,8 @@ void GMainWindow::OnConfigure() {
const auto old_language_index = Settings::values.language_index.GetValue();
Settings::SetConfiguringGlobal(true);
- ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(), *system,
+ ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(),
+ vk_device_records, *system,
!multiplayer_state->IsHostingPublicRoom());
connect(&configure_dialog, &ConfigureDialog::LanguageChanged, this,
&GMainWindow::OnLanguageChanged);
@@ -3765,7 +3769,7 @@ void GMainWindow::OpenPerGameConfiguration(u64 title_id, const std::string& file
const auto v_file = Core::GetGameFileFromPath(vfs, file_name);
Settings::SetConfiguringGlobal(false);
- ConfigurePerGame dialog(this, title_id, file_name, *system);
+ ConfigurePerGame dialog(this, title_id, file_name, vk_device_records, *system);
dialog.LoadFromFile(v_file);
const auto result = dialog.exec();
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 6bb70972f..e0e775d87 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -118,6 +118,10 @@ enum class ReinitializeKeyBehavior {
Warning,
};
+namespace VkDeviceInfo {
+class Record;
+}
+
class GMainWindow : public QMainWindow {
Q_OBJECT
@@ -418,6 +422,8 @@ private:
GameListPlaceholder* game_list_placeholder;
+ std::vector<VkDeviceInfo::Record> vk_device_records;
+
// Status bar elements
QLabel* message_label = nullptr;
QLabel* shader_building_label = nullptr;
diff --git a/src/yuzu/vk_device_info.cpp b/src/yuzu/vk_device_info.cpp
new file mode 100644
index 000000000..7c26a3dc7
--- /dev/null
+++ b/src/yuzu/vk_device_info.cpp
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <utility>
+#include <vector>
+#include "common/dynamic_library.h"
+#include "common/logging/log.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_instance.h"
+#include "video_core/vulkan_common/vulkan_library.h"
+#include "video_core/vulkan_common/vulkan_surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+#include "vulkan/vulkan_core.h"
+#include "yuzu/qt_common.h"
+#include "yuzu/vk_device_info.h"
+
+class QWindow;
+
+namespace VkDeviceInfo {
+Record::Record(std::string_view name_, const std::vector<VkPresentModeKHR>& vsync_modes_,
+ bool has_broken_compute_)
+ : name{name_}, vsync_support{vsync_modes_}, has_broken_compute{has_broken_compute_} {}
+
+Record::~Record() = default;
+
+void PopulateRecords(std::vector<Record>& records, QWindow* window) try {
+ using namespace Vulkan;
+
+ auto wsi = QtCommon::GetWindowSystemInfo(window);
+
+ vk::InstanceDispatch dld;
+ const auto library = OpenLibrary();
+ const vk::Instance instance = CreateInstance(*library, dld, VK_API_VERSION_1_1, wsi.type);
+ const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
+ vk::SurfaceKHR surface = CreateSurface(instance, wsi);
+
+ records.clear();
+ records.reserve(physical_devices.size());
+ for (const VkPhysicalDevice device : physical_devices) {
+ const auto physical_device = vk::PhysicalDevice(device, dld);
+ const std::string name = physical_device.GetProperties().deviceName;
+ const std::vector<VkPresentModeKHR> present_modes =
+ physical_device.GetSurfacePresentModesKHR(*surface);
+
+ VkPhysicalDeviceDriverProperties driver_properties{};
+ driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
+ driver_properties.pNext = nullptr;
+ VkPhysicalDeviceProperties2 properties{};
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ properties.pNext = &driver_properties;
+ dld.vkGetPhysicalDeviceProperties2(physical_device, &properties);
+
+ bool has_broken_compute{Vulkan::Device::CheckBrokenCompute(
+ driver_properties.driverID, properties.properties.driverVersion)};
+
+ records.push_back(VkDeviceInfo::Record(name, present_modes, has_broken_compute));
+ }
+} catch (const Vulkan::vk::Exception& exception) {
+ LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
+}
+} // namespace VkDeviceInfo
diff --git a/src/yuzu/vk_device_info.h b/src/yuzu/vk_device_info.h
new file mode 100644
index 000000000..bda8262f4
--- /dev/null
+++ b/src/yuzu/vk_device_info.h
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <algorithm>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+#include "common/common_types.h"
+#include "vulkan/vulkan_core.h"
+
+class QWindow;
+
+namespace Settings {
+enum class VSyncMode : u32;
+}
+// #include "common/settings.h"
+
+namespace VkDeviceInfo {
+// Short class to record Vulkan driver information for configuration purposes
+class Record {
+public:
+ explicit Record(std::string_view name, const std::vector<VkPresentModeKHR>& vsync_modes,
+ bool has_broken_compute);
+ ~Record();
+
+ const std::string name;
+ const std::vector<VkPresentModeKHR> vsync_support;
+ const bool has_broken_compute;
+};
+
+void PopulateRecords(std::vector<Record>& records, QWindow* window);
+} // namespace VkDeviceInfo