// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #pragma once #include "gpu_types.h" #include "timers.h" #include "types.h" #include "util/gpu_device.h" #include "util/gpu_texture.h" #include "common/bitfield.h" #include "common/fifo_queue.h" #include "common/types.h" #include #include #include #include #include #include #include #include class Error; class Image; class SmallStringBase; class StateWrapper; class GPUDevice; class GPUTexture; class GPUPipeline; class MediaCapture; namespace GPUDump { enum class PacketType : u8; class Recorder; class Player; } // namespace GPUDump class GPUBackend; struct Settings; namespace System { struct MemorySaveState; } struct GPUBackendCommand; struct GPUBackendDrawCommand; class GPU final { public: enum class BlitterState : u8 { Idle, ReadingVRAM, WritingVRAM, DrawingPolyLine }; enum : u32 { MAX_FIFO_SIZE = 4096, DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, MAX_RESOLUTION_SCALE = 32, DRAWING_AREA_COORD_MASK = 1023, }; enum : u16 { NTSC_TICKS_PER_LINE = 3413, NTSC_TOTAL_LINES = 263, PAL_TICKS_PER_LINE = 3406, PAL_TOTAL_LINES = 314, }; enum : u16 { NTSC_HORIZONTAL_ACTIVE_START = 488, NTSC_HORIZONTAL_ACTIVE_END = 3288, NTSC_VERTICAL_ACTIVE_START = 16, NTSC_VERTICAL_ACTIVE_END = 256, NTSC_OVERSCAN_HORIZONTAL_ACTIVE_START = 608, NTSC_OVERSCAN_HORIZONTAL_ACTIVE_END = 3168, NTSC_OVERSCAN_VERTICAL_ACTIVE_START = 24, NTSC_OVERSCAN_VERTICAL_ACTIVE_END = 248, PAL_HORIZONTAL_ACTIVE_START = 488, PAL_HORIZONTAL_ACTIVE_END = 3300, PAL_VERTICAL_ACTIVE_START = 20, PAL_VERTICAL_ACTIVE_END = 308, PAL_OVERSCAN_HORIZONTAL_ACTIVE_START = 628, PAL_OVERSCAN_HORIZONTAL_ACTIVE_END = 3188, PAL_OVERSCAN_VERTICAL_ACTIVE_START = 30, PAL_OVERSCAN_VERTICAL_ACTIVE_END = 298, }; // Base class constructor. GPU(); ~GPU(); void Initialize(); void Shutdown(); void Reset(bool clear_vram); bool DoState(StateWrapper& sw); void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss); // Render statistics debug window. void DrawDebugStateWindow(float scale); void CPUClockChanged(); // MMIO access u32 ReadRegister(u32 offset); void WriteRegister(u32 offset, u32 value); // DMA access void DMARead(u32* words, u32 word_count); ALWAYS_INLINE bool BeginDMAWrite() const { return (m_GPUSTAT.dma_direction == GPUDMADirection::CPUtoGP0 || m_GPUSTAT.dma_direction == GPUDMADirection::FIFO); } ALWAYS_INLINE void DMAWrite(u32 address, u32 value) { m_fifo.Push((ZeroExtend64(address) << 32) | ZeroExtend64(value)); } void EndDMAWrite(); /// Writing to GPU dump. GPUDump::Recorder* GetGPUDump() const { return m_gpu_dump.get(); } bool StartRecordingGPUDump(const char* path, u32 num_frames = 1); void StopRecordingGPUDump(); void WriteCurrentVideoModeToDump(GPUDump::Recorder* dump) const; void ProcessGPUDumpPacket(GPUDump::PacketType type, const std::span data); /// Returns true if no data is being sent from VRAM to the DAC or that no portion of VRAM would be visible on screen. ALWAYS_INLINE bool IsDisplayDisabled() const { return m_GPUSTAT.display_disable || m_crtc_state.display_vram_width == 0 || m_crtc_state.display_vram_height == 0; } /// Returns true if scanout should be interlaced. ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const { return (!m_force_progressive_scan && m_GPUSTAT.vertical_interlace); } /// Returns true if interlaced rendering is enabled and force progressive scan is disabled. ALWAYS_INLINE bool IsInterlacedRenderingEnabled() const { return (!m_force_progressive_scan && m_GPUSTAT.SkipDrawingToActiveField()); } /// Returns true if we're in PAL mode, otherwise false if NTSC. ALWAYS_INLINE bool IsInPALMode() const { return m_GPUSTAT.pal_mode; } /// Returns the number of pending GPU ticks. TickCount GetPendingCRTCTicks() const; TickCount GetPendingCommandTicks() const; TickCount GetRemainingCommandTicks() const; /// Returns true if enough ticks have passed for the raster to be on the next line. bool IsCRTCScanlinePending() const; /// Returns true if a raster scanline or command execution is pending. bool IsCommandCompletionPending() const; /// Synchronizes the CRTC, updating the hblank timer. void SynchronizeCRTC(); /// Recompile shaders/recreate framebuffers when needed. void UpdateSettings(const Settings& old_settings); /// Returns the full display resolution of the GPU, including padding. std::tuple GetFullDisplayResolution() const; /// Computes clamped drawing area. static GSVector4i GetClampedDrawingArea(const GPUDrawingArea& drawing_area); float ComputeHorizontalFrequency() const; float ComputeVerticalFrequency() const; float ComputeDisplayAspectRatio() const; float ComputeSourceAspectRatio() const; float ComputePixelAspectRatio() const; /// Computes aspect ratio correction, i.e. the scale to apply to the source aspect ratio to preserve /// the original pixel aspect ratio regardless of how much cropping has been applied. float ComputeAspectRatioCorrection() const; /// Applies the pixel aspect ratio to a given size, preserving the larger dimension. static void ApplyPixelAspectRatioToSize(float par, float* width, float* height); // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. void ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const; bool ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick, u32* out_line) const; // Returns the current beam position. void GetBeamPosition(u32* out_ticks, u32* out_line); // Returns the number of system clock ticks until the specified tick/line. TickCount GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line); // Returns the number of visible lines. ALWAYS_INLINE u16 GetCRTCActiveStartLine() const { return m_crtc_state.vertical_display_start; } ALWAYS_INLINE u16 GetCRTCActiveEndLine() const { return m_crtc_state.vertical_display_end; } // Returns the video clock frequency. TickCount GetCRTCFrequency() const; ALWAYS_INLINE u16 GetCRTCDotClockDivider() const { return m_crtc_state.dot_clock_divider; } ALWAYS_INLINE s32 GetCRTCDisplayWidth() const { return m_crtc_state.display_width; } ALWAYS_INLINE s32 GetCRTCDisplayHeight() const { return m_crtc_state.display_height; } // Ticks for hblank/vblank. void CRTCTickEvent(TickCount ticks); void CommandTickEvent(TickCount ticks); void FrameDoneEvent(TickCount ticks); // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); // Kicks the current frame to the backend for display. void UpdateDisplay(bool submit_frame); // Queues the current frame for presentation. Should only be used with runahead. void QueuePresentCurrentFrame(); /// Computes the effective resolution scale when it is set to automatic. u8 CalculateAutomaticResolutionScale() const; /// Helper function for computing the draw rectangle in a larger window. static void CalculateDrawRect(u32 window_width, u32 window_height, u32 crtc_display_width, u32 crtc_display_height, s32 display_origin_left, s32 display_origin_top, u32 display_vram_width, u32 display_vram_height, DisplayRotation rotation, DisplayAlignment alignment, float pixel_aspect_ratio, bool integer_scale, GSVector4i* display_rect, GSVector4i* draw_rect); private: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; // The GPU internally appears to run at 2x the system clock. ALWAYS_INLINE static constexpr TickCount GPUTicksToSystemTicks(TickCount gpu_ticks) { return std::max((gpu_ticks + 1) >> 1, 1); } ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); void SoftReset(); void ClearDisplay(); // Sets dots per scanline void UpdateCRTCConfig(); void UpdateCRTCDisplayParameters(); // Update ticks for this execution slice void UpdateCRTCTickEvent(); void UpdateCommandTickEvent(); // Updates dynamic bits in GPUSTAT (ready to send VRAM/ready to receive DMA) void UpdateDMARequest(); void UpdateGPUIdle(); /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). ALWAYS_INLINE u8 GetInterlacedDisplayField() const { return m_crtc_state.interlaced_field; } /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. ALWAYS_INLINE u8 GetActiveLineLSB() const { return m_crtc_state.active_line_lsb; } /// Updates drawing area that's suitablef or clamping. void SetClampedDrawingArea(); /// Sets/decodes GP0(E1h) (set draw mode). void SetDrawMode(u16 bits); /// Sets/decodes polygon/rectangle texture palette value. void SetTexturePalette(u16 bits); /// Sets/decodes texture window bits. void SetTextureWindow(u32 value); u32 ReadGPUREAD(); void FinishVRAMWrite(); /// Returns the number of vertices in the buffered poly-line. ALWAYS_INLINE u32 GetPolyLineVertexCount() const { return (static_cast(m_polyline_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >> BoolToUInt8(m_render_command.shading_enable); } void AddCommandTicks(TickCount ticks); void WriteGP1(u32 value); void EndCommand(); void ExecuteCommands(); void TryExecuteCommands(); void HandleGetGPUInfoCommand(u32 value); void UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut); void InvalidateCLUT(); bool IsCLUTValid() const; void ReadVRAM(u16 x, u16 y, u16 width, u16 height); void UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask); void PrepareForDraw(); void FinishPolyline(); void FillDrawCommand(GPUBackendDrawCommand* RESTRICT cmd, GPURenderCommand rc) const; ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded, bool textured, bool semitransparent) { // This will not produce the correct results for triangles which are partially outside the clip area. // However, usually it'll undershoot not overshoot. If we wanted to make this more accurate, we'd need to intersect // the edges with the clip rectangle. // TODO: Coordinates are exclusive, so off by one here... const GSVector2i clamp_min = GSVector2i::load(&m_clamped_drawing_area.x); const GSVector2i clamp_max = GSVector2i::load(&m_clamped_drawing_area.z); v1 = v1.sat_s32(clamp_min, clamp_max); v2 = v2.sat_s32(clamp_min, clamp_max); v3 = v3.sat_s32(clamp_min, clamp_max); TickCount pixels = std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2); if (textured) pixels += pixels; if (semitransparent || m_GPUSTAT.check_mask_before_draw) pixels += (pixels + 1) / 2; if (m_GPUSTAT.SkipDrawingToActiveField()) pixels /= 2; AddCommandTicks(pixels); } ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent) { const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); u32 drawn_width = clamped_rect.width(); u32 drawn_height = clamped_rect.height(); u32 ticks_per_row = drawn_width; if (textured) { switch (m_draw_mode.mode_reg.texture_mode) { case GPUTextureMode::Palette4Bit: ticks_per_row += drawn_width; break; case GPUTextureMode::Palette8Bit: { // Texture cache reload every 2 pixels, reads in 8 bytes (assuming 4x2). Cache only reloads if the // draw width is greater than 32, otherwise the cache hits between rows. if (drawn_width >= 32) ticks_per_row += (drawn_width / 4) * 8; else ticks_per_row += drawn_width; } break; case GPUTextureMode::Direct16Bit: case GPUTextureMode::Reserved_Direct16Bit: { // Same as above, except with 2x2 blocks instead of 4x2. if (drawn_width >= 32) ticks_per_row += (drawn_width / 2) * 8; else ticks_per_row += drawn_width; } break; DefaultCaseIsUnreachable() } } if (semitransparent || m_GPUSTAT.check_mask_before_draw) ticks_per_row += (drawn_width + 1u) / 2u; if (m_GPUSTAT.SkipDrawingToActiveField()) drawn_height = std::max(drawn_height / 2, 1u); AddCommandTicks(ticks_per_row * drawn_height); } ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i rect, bool shaded) { const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); // Needed because we're not multiplying either dimension. if (clamped_rect.rempty()) return; const u32 drawn_width = clamped_rect.width(); u32 drawn_height = clamped_rect.height(); if (m_GPUSTAT.SkipDrawingToActiveField()) drawn_height = std::max(drawn_height / 2, 1u); AddCommandTicks(std::max(drawn_width, drawn_height)); } GPUSTAT m_GPUSTAT = {}; struct DrawMode { static constexpr u16 PALETTE_MASK = UINT16_C(0b0111111111111111); static constexpr u32 TEXTURE_WINDOW_MASK = UINT32_C(0b11111111111111111111); // original values GPUDrawModeReg mode_reg; GPUTexturePaletteReg palette_reg; // from vertex u32 texture_window_value; // decoded values // TODO: Make this a command GPUTextureWindow texture_window; bool texture_x_flip; bool texture_y_flip; } m_draw_mode = {}; GPUDrawingArea m_drawing_area = {}; GPUDrawingOffset m_drawing_offset = {}; GSVector4i m_clamped_drawing_area = {}; bool m_console_is_pal = false; bool m_set_texture_disable_mask = false; bool m_drawing_area_changed = false; bool m_force_progressive_scan = false; ForceVideoTimingMode m_force_frame_timings = ForceVideoTimingMode::Disabled; struct CRTCState { struct Regs { static constexpr u32 DISPLAY_ADDRESS_START_MASK = 0b111'11111111'11111110; static constexpr u32 HORIZONTAL_DISPLAY_RANGE_MASK = 0b11111111'11111111'11111111; static constexpr u32 VERTICAL_DISPLAY_RANGE_MASK = 0b1111'11111111'11111111; union { u32 display_address_start; BitField X; BitField Y; }; union { u32 horizontal_display_range; BitField X1; BitField X2; }; union { u32 vertical_display_range; BitField Y1; BitField Y2; }; } regs; u16 dot_clock_divider; // Size of the simulated screen in pixels. Depending on crop mode, this may include overscan area. u16 display_width; u16 display_height; // Top-left corner in screen coordinates where the outputted portion of VRAM is first visible. u16 display_origin_left; u16 display_origin_top; // Rectangle in VRAM coordinates describing the area of VRAM that is visible on screen. u16 display_vram_left; u16 display_vram_top; u16 display_vram_width; u16 display_vram_height; // Visible range of the screen, in GPU ticks/lines. Clamped to lie within the active video region. u16 horizontal_visible_start; u16 horizontal_visible_end; u16 vertical_visible_start; u16 vertical_visible_end; u16 horizontal_display_start; u16 horizontal_display_end; u16 vertical_display_start; u16 vertical_display_end; u16 horizontal_active_start; u16 horizontal_active_end; u16 horizontal_total; u16 vertical_total; TickCount fractional_ticks; TickCount current_tick_in_scanline; u32 current_scanline; TickCount fractional_dot_ticks; // only used when timer0 is enabled bool in_hblank; bool in_vblank; u8 interlaced_field; // 0 = odd, 1 = even u8 interlaced_display_field; u8 active_line_lsb; ALWAYS_INLINE void UpdateHBlankFlag() { in_hblank = (current_tick_in_scanline < horizontal_active_start || current_tick_in_scanline >= horizontal_active_end); } } m_crtc_state = {}; BlitterState m_blitter_state = BlitterState::Idle; u32 m_command_total_words = 0; TickCount m_pending_command_ticks = 0; /// GPUREAD value for non-VRAM-reads. u32 m_GPUREAD_latch = 0; // These are the bits from the palette register, but zero extended to 32-bit, so we can have an "invalid" value. // If an extra byte is ever not needed here for padding, the 8-bit flag could be packed into the MSB of this value. u32 m_current_clut_reg_bits = {}; bool m_current_clut_is_8bit = false; /// True if currently executing/syncing. bool m_executing_commands = false; struct VRAMTransfer { u16 x; u16 y; u16 width; u16 height; u16 col; u16 row; } m_vram_transfer = {}; std::unique_ptr m_gpu_dump; HeapFIFOQueue m_fifo; TickCount m_max_run_ahead = 128; u32 m_fifo_size = 128; u32 m_blit_remaining_words; GPURenderCommand m_render_command{}; std::vector m_blit_buffer; std::vector m_polyline_buffer; ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); } ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); } ALWAYS_INLINE u32 FifoPeek(u32 i) { return Truncate32(m_fifo.Peek(i)); } private: using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); // Rendering commands, returns false if not enough data is provided bool HandleUnknownGP0Command(); bool HandleNOPCommand(); bool HandleClearCacheCommand(); bool HandleInterruptRequestCommand(); bool HandleSetDrawModeCommand(); bool HandleSetTextureWindowCommand(); bool HandleSetDrawingAreaTopLeftCommand(); bool HandleSetDrawingAreaBottomRightCommand(); bool HandleSetDrawingOffsetCommand(); bool HandleSetMaskBitCommand(); bool HandleRenderPolygonCommand(); bool HandleRenderRectangleCommand(); bool HandleRenderLineCommand(); bool HandleRenderPolyLineCommand(); bool HandleFillRectangleCommand(); bool HandleCopyRectangleCPUToVRAMCommand(); bool HandleCopyRectangleVRAMToCPUCommand(); bool HandleCopyRectangleVRAMToVRAMCommand(); static const GP0CommandHandlerTable s_GP0_command_handler_table; }; extern GPU g_gpu; extern u16 g_vram[VRAM_SIZE / sizeof(u16)]; extern u16 g_gpu_clut[GPU_CLUT_SIZE];