summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_rasterizer_cache.h
blob: 838554c35715ffddfdfa844b1e93fa413511c6df (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#pragma once

#include <array>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>

#include "common/alignment.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"

namespace OpenGL {

class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;

using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
using SurfaceType = VideoCore::Surface::SurfaceType;
using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;

struct SurfaceParams {
    enum class SurfaceClass {
        Uploaded,
        RenderTarget,
        DepthBuffer,
        Copy,
    };

    static std::string SurfaceTargetName(SurfaceTarget target) {
        switch (target) {
        case SurfaceTarget::Texture1D:
            return "Texture1D";
        case SurfaceTarget::Texture2D:
            return "Texture2D";
        case SurfaceTarget::Texture3D:
            return "Texture3D";
        case SurfaceTarget::Texture1DArray:
            return "Texture1DArray";
        case SurfaceTarget::Texture2DArray:
            return "Texture2DArray";
        case SurfaceTarget::TextureCubemap:
            return "TextureCubemap";
        case SurfaceTarget::TextureCubeArray:
            return "TextureCubeArray";
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
            UNREACHABLE();
            return fmt::format("TextureUnknown({})", static_cast<u32>(target));
        }
    }

    u32 GetFormatBpp() const {
        return VideoCore::Surface::GetFormatBpp(pixel_format);
    }

    /// Returns the rectangle corresponding to this surface
    Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;

    /// Returns the total size of this surface in bytes, adjusted for compression
    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
        const u32 compression_factor{GetCompressionFactor(pixel_format)};
        const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
        const size_t uncompressed_size{
            Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
                                          height, depth, block_height, block_depth)};

        // Divide by compression_factor^2, as height and width are factored by this
        return uncompressed_size / (compression_factor * compression_factor);
    }

    /// Returns the size of this surface as an OpenGL texture in bytes
    std::size_t SizeInBytesGL() const {
        return SizeInBytesRaw(true);
    }

    /// Returns the size of this surface as a cube face in bytes
    std::size_t SizeInBytesCubeFace() const {
        return size_in_bytes / 6;
    }

    /// Returns the size of this surface as an OpenGL cube face in bytes
    std::size_t SizeInBytesCubeFaceGL() const {
        return size_in_bytes_gl / 6;
    }

    /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
    std::size_t MemorySize() const {
        std::size_t size = InnerMemorySize(false, is_layered);
        if (is_layered)
            return size * depth;
        return size;
    }

    /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
    /// mipmaps.
    std::size_t LayerMemorySize() const {
        return InnerMemorySize(false, true);
    }

    /// Returns the size of a layer of this surface in OpenGL.
    std::size_t LayerSizeGL(u32 mip_level) const {
        return InnerMipmapMemorySize(mip_level, true, is_layered, false);
    }

    std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
        std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
        if (is_layered)
            return size * depth;
        return size;
    }

    std::size_t GetMipmapLevelOffset(u32 mip_level) const {
        std::size_t offset = 0;
        for (u32 i = 0; i < mip_level; i++)
            offset += InnerMipmapMemorySize(i, false, is_layered);
        return offset;
    }

    std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
        std::size_t offset = 0;
        for (u32 i = 0; i < mip_level; i++)
            offset += InnerMipmapMemorySize(i, true, is_layered);
        return offset;
    }

    u32 MipWidth(u32 mip_level) const {
        return std::max(1U, width >> mip_level);
    }

    u32 MipHeight(u32 mip_level) const {
        return std::max(1U, height >> mip_level);
    }

    u32 MipDepth(u32 mip_level) const {
        return is_layered ? depth : std::max(1U, depth >> mip_level);
    }

    // Auto block resizing algorithm from:
    // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
    u32 MipBlockHeight(u32 mip_level) const {
        if (mip_level == 0)
            return block_height;
        u32 alt_height = MipHeight(mip_level);
        u32 h = GetDefaultBlockHeight(pixel_format);
        u32 blocks_in_y = (alt_height + h - 1) / h;
        u32 bh = 16;
        while (bh > 1 && blocks_in_y <= bh * 4) {
            bh >>= 1;
        }
        return bh;
    }

    u32 MipBlockDepth(u32 mip_level) const {
        if (mip_level == 0) {
            return block_depth;
        }

        if (is_layered) {
            return 1;
        }

        const u32 mip_depth = MipDepth(mip_level);
        u32 bd = 32;
        while (bd > 1 && mip_depth * 2 <= bd) {
            bd >>= 1;
        }

        if (bd == 32) {
            const u32 bh = MipBlockHeight(mip_level);
            if (bh >= 4) {
                return 16;
            }
        }

        return bd;
    }

    /// Creates SurfaceParams from a texture configuration
    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
                                          const GLShader::SamplerEntry& entry);

    /// Creates SurfaceParams from a framebuffer configuration
    static SurfaceParams CreateForFramebuffer(std::size_t index);

    /// Creates SurfaceParams for a depth buffer configuration
    static SurfaceParams CreateForDepthBuffer(
        u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
        u32 block_width, u32 block_height, u32 block_depth,
        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);

    /// Creates SurfaceParams for a Fermi2D surface copy
    static SurfaceParams CreateForFermiCopySurface(
        const Tegra::Engines::Fermi2D::Regs::Surface& config);

    /// Checks if surfaces are compatible for caching
    bool IsCompatibleSurface(const SurfaceParams& other) const {
        if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
            std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
                     other.depth, other.is_tiled)) {
            if (!is_tiled)
                return true;
            return std::tie(block_height, block_depth, tile_width_spacing) ==
                   std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
        }
        return false;
    }

    /// Initializes parameters for caching, should be called after everything has been initialized
    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);

    std::string TargetName() const {
        switch (target) {
        case SurfaceTarget::Texture1D:
            return "1D";
        case SurfaceTarget::Texture2D:
            return "2D";
        case SurfaceTarget::Texture3D:
            return "3D";
        case SurfaceTarget::Texture1DArray:
            return "1DArray";
        case SurfaceTarget::Texture2DArray:
            return "2DArray";
        case SurfaceTarget::TextureCubemap:
            return "Cube";
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
            UNREACHABLE();
            return fmt::format("TUK({})", static_cast<u32>(target));
        }
    }

    std::string ClassName() const {
        switch (identity) {
        case SurfaceClass::Uploaded:
            return "UP";
        case SurfaceClass::RenderTarget:
            return "RT";
        case SurfaceClass::DepthBuffer:
            return "DB";
        case SurfaceClass::Copy:
            return "CP";
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
            UNREACHABLE();
            return fmt::format("CUK({})", static_cast<u32>(identity));
        }
    }

    std::string IdentityString() const {
        return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
    }

    bool is_tiled;
    u32 block_width;
    u32 block_height;
    u32 block_depth;
    u32 tile_width_spacing;
    PixelFormat pixel_format;
    ComponentType component_type;
    SurfaceType type;
    u32 width;
    u32 height;
    u32 depth;
    u32 unaligned_height;
    u32 pitch;
    SurfaceTarget target;
    SurfaceClass identity;
    u32 max_mip_level;
    bool is_layered;
    bool is_array;
    bool srgb_conversion;
    // Parameters used for caching
    VAddr addr;
    Tegra::GPUVAddr gpu_addr;
    std::size_t size_in_bytes;
    std::size_t size_in_bytes_gl;

    // Render target specific parameters, not used in caching
    struct {
        u32 index;
        u32 array_mode;
        u32 volume;
        u32 layer_stride;
        u32 base_layer;
    } rt;

private:
    std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
                                      bool uncompressed = false) const;
    std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
                                bool uncompressed = false) const;
};

}; // namespace OpenGL

/// Hashable variation of SurfaceParams, used for a key in the surface cache
struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
    static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
        SurfaceReserveKey res;
        res.state = params;
        res.state.identity = {}; // Ignore the origin of the texture
        res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
        res.state.rt = {};       // Ignore rt config in caching
        return res;
    }
};
namespace std {
template <>
struct hash<SurfaceReserveKey> {
    std::size_t operator()(const SurfaceReserveKey& k) const {
        return k.Hash();
    }
};
} // namespace std

namespace OpenGL {

class RasterizerOpenGL;

class CachedSurface final : public RasterizerCacheObject {
public:
    CachedSurface(const SurfaceParams& params);

    VAddr GetAddr() const override {
        return params.addr;
    }

    std::size_t GetSizeInBytes() const override {
        return cached_size_in_bytes;
    }

    void Flush() override {
        FlushGLBuffer();
    }

    const OGLTexture& Texture() const {
        return texture;
    }

    const OGLTexture& TextureLayer() {
        if (params.is_array) {
            return Texture();
        }
        EnsureTextureView();
        return texture_view;
    }

    GLenum Target() const {
        return gl_target;
    }

    GLenum TargetLayer() const {
        using VideoCore::Surface::SurfaceTarget;
        switch (params.target) {
        case SurfaceTarget::Texture1D:
            return GL_TEXTURE_1D_ARRAY;
        case SurfaceTarget::Texture2D:
            return GL_TEXTURE_2D_ARRAY;
        case SurfaceTarget::TextureCubemap:
            return GL_TEXTURE_CUBE_MAP_ARRAY;
        }
        return Target();
    }

    const SurfaceParams& GetSurfaceParams() const {
        return params;
    }

    // Read/Write data in Switch memory to/from gl_buffer
    void LoadGLBuffer();
    void FlushGLBuffer();

    // Upload data in gl_buffer to this surface's texture
    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);

    void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
                       Tegra::Texture::SwizzleSource swizzle_y,
                       Tegra::Texture::SwizzleSource swizzle_z,
                       Tegra::Texture::SwizzleSource swizzle_w);

private:
    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);

    void EnsureTextureView();

    OGLTexture texture;
    OGLTexture texture_view;
    std::vector<std::vector<u8>> gl_buffer;
    SurfaceParams params{};
    GLenum gl_target{};
    GLenum gl_internal_format{};
    std::size_t cached_size_in_bytes{};
    std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
};

class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
public:
    explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);

    /// Get a surface based on the texture configuration
    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
                              const GLShader::SamplerEntry& entry);

    /// Get the depth surface based on the framebuffer configuration
    Surface GetDepthBufferSurface(bool preserve_contents);

    /// Get the color surface based on the framebuffer configuration and the specified render target
    Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);

    /// Tries to find a framebuffer using on the provided CPU address
    Surface TryFindFramebufferSurface(VAddr addr) const;

    /// Copies the contents of one surface to another
    void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
                          const Common::Rectangle<u32>& src_rect,
                          const Common::Rectangle<u32>& dst_rect);

private:
    void LoadSurface(const Surface& surface);
    Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);

    /// Gets an uncached surface, creating it if need be
    Surface GetUncachedSurface(const SurfaceParams& params);

    /// Recreates a surface with new parameters
    Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);

    /// Reserves a unique surface that can be reused later
    void ReserveSurface(const Surface& surface);

    /// Tries to get a reserved surface for the specified parameters
    Surface TryGetReservedSurface(const SurfaceParams& params);

    /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
    void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
    void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
    void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
    void CopySurface(const Surface& src_surface, const Surface& dst_surface,
                     const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
                     const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);

    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
    /// previously been used. This is to prevent surfaces from being constantly created and
    /// destroyed when used with different surface parameters.
    std::unordered_map<SurfaceReserveKey, Surface> surface_reserve;

    OGLFramebuffer read_framebuffer;
    OGLFramebuffer draw_framebuffer;

    /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
    /// using the new format.
    OGLBuffer copy_pbo;

    std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
    Surface last_depth_buffer;
};

} // namespace OpenGL