From 0cfe0ac16212b35f583984588c8130778ce67054 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 23 Aug 2022 16:22:58 +0200 Subject: [PATCH 01/10] AtlasEngine: Improve RDP performance --- src/renderer/atlas/AtlasEngine.cpp | 285 +++++++------ src/renderer/atlas/AtlasEngine.h | 62 ++- src/renderer/atlas/AtlasEngine.r.cpp | 607 +++++++++++++++++++++------ 3 files changed, 695 insertions(+), 259 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index fea31c0fb2d..f7276428193 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -160,100 +160,105 @@ try } #endif - if (_api.invalidatedRows == invalidatedRowsAll) + if constexpr (debugGlyphGenerationPerformance) + { + _r.glyphs = {}; + _r.tileAllocator = TileAllocator{ _api.fontMetrics.cellSize, _api.sizeInPixel }; + } + if constexpr (debugTextParsingPerformance) { - // Skip all the partial updates, since we redraw everything anyways. - _api.invalidatedCursorArea = invalidatedAreaNone; - _api.invalidatedRows = { 0, _api.cellCount.y }; + _api.invalidatedRows = invalidatedRowsAll; _api.scrollOffset = 0; } - else + + // Clamp invalidation rects into valid value ranges. { - // Clamp invalidation rects into valid value ranges. - { - _api.invalidatedCursorArea.left = std::min(_api.invalidatedCursorArea.left, _api.cellCount.x); - _api.invalidatedCursorArea.top = std::min(_api.invalidatedCursorArea.top, _api.cellCount.y); - _api.invalidatedCursorArea.right = clamp(_api.invalidatedCursorArea.right, _api.invalidatedCursorArea.left, _api.cellCount.x); - _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _api.cellCount.y); - } - { - _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _api.cellCount.y); - _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _api.cellCount.y); - } + _api.invalidatedCursorArea.left = std::min(_api.invalidatedCursorArea.left, _api.cellCount.x); + _api.invalidatedCursorArea.top = std::min(_api.invalidatedCursorArea.top, _api.cellCount.y); + _api.invalidatedCursorArea.right = clamp(_api.invalidatedCursorArea.right, _api.invalidatedCursorArea.left, _api.cellCount.x); + _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _api.cellCount.y); + } + { + _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _api.cellCount.y); + _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _api.cellCount.y); + } + { + const auto limit = gsl::narrow_cast(_api.cellCount.y & 0x7fff); + _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); + } + + // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". + if (_api.scrollOffset != 0) + { + const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; + const auto offset = static_cast(_api.scrollOffset) * _api.cellCount.x; + + if (_api.scrollOffset < 0) { - const auto limit = gsl::narrow_cast(_api.cellCount.y & 0x7fff); - _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); + // Scroll up (for instance when new text is being written at the end of the buffer). + const u16 endRow = _api.cellCount.y + _api.scrollOffset; + _api.invalidatedRows.x = nothingInvalid ? endRow : std::min(_api.invalidatedRows.x, endRow); + _api.invalidatedRows.y = _api.cellCount.y; + + // scrollOffset/offset = -1 + // +----------+ +----------+ + // | | | xxxxxxxxx| + dst < beg + // | xxxxxxxxx| -> |xxxxxxx | + src | < beg - offset + // |xxxxxxx | | | | v + // +----------+ +----------+ v < end + { + const auto beg = _r.cells.begin(); + const auto end = _r.cells.end(); + std::move(beg - offset, end, beg); + } + { + const auto beg = _r.cellGlyphMapping.begin(); + const auto end = _r.cellGlyphMapping.end(); + std::move(beg - offset, end, beg); + } } - - // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". - if (_api.scrollOffset != 0) + else { - const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; - const auto offset = static_cast(_api.scrollOffset) * _api.cellCount.x; - auto count = _r.cells.size(); - - if (_api.scrollOffset < 0) + // Scroll down. + _api.invalidatedRows.x = 0; + _api.invalidatedRows.y = nothingInvalid ? _api.scrollOffset : std::max(_api.invalidatedRows.y, _api.scrollOffset); + + // scrollOffset/offset = 1 + // +----------+ +----------+ + // | xxxxxxxxx| | | + src < beg + // |xxxxxxx | -> | xxxxxxxxx| | ^ + // | | |xxxxxxx | v | < end - offset + // +----------+ +----------+ + dst < end { - // Scroll up (for instance when new text is being written at the end of the buffer). - const u16 endRow = _api.cellCount.y + _api.scrollOffset; - _api.invalidatedRows.x = nothingInvalid ? endRow : std::min(_api.invalidatedRows.x, endRow); - _api.invalidatedRows.y = _api.cellCount.y; - - // scrollOffset/offset = -1 - // +----------+ +----------+ - // | | | xxxxxxxxx| + dst < beg - // | xxxxxxxxx| -> |xxxxxxx | + src | < beg - offset - // |xxxxxxx | | | | v - // +----------+ +----------+ v < end - { - const auto beg = _r.cells.begin(); - const auto end = beg + count; - std::move(beg - offset, end, beg); - } - { - const auto beg = _r.cellGlyphMapping.begin(); - const auto end = beg + count; - std::move(beg - offset, end, beg); - } + const auto beg = _r.cells.begin(); + const auto end = _r.cells.end(); + std::move_backward(beg, end - offset, end); } - else { - // Scroll down. - _api.invalidatedRows.x = 0; - _api.invalidatedRows.y = nothingInvalid ? _api.scrollOffset : std::max(_api.invalidatedRows.y, _api.scrollOffset); - - // scrollOffset/offset = 1 - // +----------+ +----------+ - // | xxxxxxxxx| | | + src < beg - // |xxxxxxx | -> | xxxxxxxxx| | ^ - // | | |xxxxxxx | v | < end - offset - // +----------+ +----------+ + dst < end - { - const auto beg = _r.cells.begin(); - const auto end = beg + count; - std::move_backward(beg, end - offset, end); - } - { - const auto beg = _r.cellGlyphMapping.begin(); - const auto end = beg + count; - std::move_backward(beg, end - offset, end); - } + const auto beg = _r.cellGlyphMapping.begin(); + const auto end = _r.cellGlyphMapping.end(); + std::move_backward(beg, end - offset, end); } } } - if constexpr (debugGlyphGenerationPerformance) - { - _r.glyphs = {}; - _r.tileAllocator = TileAllocator{ _api.fontMetrics.cellSize, _api.sizeInPixel }; - } - if constexpr (debugTextParsingPerformance) + _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y }; + + // Skip partial updates in the renderer if we redraw everything. + if (_api.invalidatedRows == u16x2{ 0, _r.cellCount.y }) { - _api.dirtyRect = til::rect{ 0, 0, _api.cellCount.x, _api.cellCount.y }; + _r.dirtyRect = {}; + _r.scrollOffset = 0; } else { - _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y }; + _r.dirtyRect = _api.dirtyRect | til::rect{ + _api.invalidatedCursorArea.left, + _api.invalidatedCursorArea.top, + _api.invalidatedCursorArea.right, + _api.invalidatedCursorArea.bottom, + }; + _r.scrollOffset = _api.scrollOffset; } // This is an important block of code for our TileHashMap. @@ -319,7 +324,7 @@ void AtlasEngine::WaitUntilCanRender() noexcept { if constexpr (!debugGeneralPerformance) { - WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); + WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), INFINITE, true); #ifndef NDEBUG _r.frameLatencyWaitableObjectUsed = true; #endif @@ -636,6 +641,13 @@ void AtlasEngine::_createResources() _r.deviceContext = deviceContext.query(); } + // > You should not use GetSystemMetrics(SM_REMOTESESSION) to determine if your application is running + // > in a remote session in Windows 8 and later or Windows Server 2012 and later if the remote session + // > may also be using the RemoteFX vGPU improvements to the Microsoft Remote Display Protocol (RDP). + // > In this case, GetSystemMetrics(SM_REMOTESESSION) will identify the remote session as a local session. + // This actually sounds great for us. The non-d2dMode of AtlasEngine has more features, but requires a GPU. + _r.d2dMode = debugForceD2DMode || GetSystemMetrics(SM_REMOTESESSION); + #ifndef NDEBUG // D3D debug messages if (deviceFlags & D3D11_CREATE_DEVICE_DEBUG) @@ -648,17 +660,20 @@ void AtlasEngine::_createResources() } #endif // NDEBUG - // Our constant buffer will never get resized + if (!_r.d2dMode) { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = sizeof(ConstBuffer); - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.constantBuffer.put())); - } + // Our constant buffer will never get resized + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(ConstBuffer); + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.constantBuffer.put())); + } - THROW_IF_FAILED(_r.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _r.vertexShader.put())); - THROW_IF_FAILED(_r.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _r.pixelShader.put())); + THROW_IF_FAILED(_r.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _r.vertexShader.put())); + THROW_IF_FAILED(_r.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _r.pixelShader.put())); + } WI_ClearFlag(_api.invalidations, ApiInvalidations::Device); WI_SetAllFlags(_api.invalidations, ApiInvalidations::SwapChain); @@ -673,6 +688,10 @@ void AtlasEngine::_releaseSwapChain() // no views are bound to pipeline state), and then call Flush on the immediate context. if (_r.swapChain && _r.deviceContext) { + if (_r.d2dMode) + { + _r.d2dRenderTarget.reset(); + } _r.frameLatencyWaitableObject.reset(); _r.swapChain.reset(); _r.renderTargetView.reset(); @@ -694,9 +713,9 @@ void AtlasEngine::_createSwapChain() desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; desc.SampleDesc.Count = 1; desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - desc.BufferCount = 2; // TODO: 3? + desc.BufferCount = 2; desc.Scaling = DXGI_SCALING_NONE; - desc.SwapEffect = _sr.isWindows10OrGreater ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + desc.SwapEffect = _sr.isWindows10OrGreater && !_r.d2dMode ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; // * HWND swap chains can't do alpha. // * If our background is opaque we can enable "independent" flips by setting DXGI_SWAP_EFFECT_FLIP_DISCARD and DXGI_ALPHA_MODE_IGNORE. // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. @@ -754,43 +773,29 @@ void AtlasEngine::_recreateSizeDependentResources() // ResizeBuffer() docs: // Before you call ResizeBuffers, ensure that the application releases all references [...]. // You can use ID3D11DeviceContext::ClearState to ensure that all [internal] references are released. - if (_r.renderTargetView) + // The _r.cells check exists simply to prevent us from calling ResizeBuffers() on startup (i.e. when `_r` is empty). + if (_r.cells) { + if (_r.d2dMode) + { + _r.d2dRenderTarget.reset(); + } _r.renderTargetView.reset(); _r.deviceContext->ClearState(); _r.deviceContext->Flush(); - THROW_IF_FAILED(_r.swapChain->ResizeBuffers(0, _api.sizeInPixel.x, _api.sizeInPixel.y, DXGI_FORMAT_UNKNOWN, DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)); - } - - // The RenderTargetView is later used with OMSetRenderTargets - // to tell D3D where stuff is supposed to be rendered at. - { - wil::com_ptr buffer; - THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); - THROW_IF_FAILED(_r.device->CreateRenderTargetView(buffer.get(), nullptr, _r.renderTargetView.put())); + THROW_IF_FAILED(_r.swapChain->ResizeBuffers(0, _api.sizeInPixel.x, _api.sizeInPixel.y, DXGI_FORMAT_UNKNOWN, debugGeneralPerformance ? 0 : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)); } - // Tell D3D which parts of the render target will be visible. - // Everything outside of the viewport will be black. - // - // In the future this should cover the entire _api.sizeInPixel.x/_api.sizeInPixel.y. - // The pixel shader should draw the remaining content in the configured background color. - { - D3D11_VIEWPORT viewport{}; - viewport.Width = static_cast(_api.sizeInPixel.x); - viewport.Height = static_cast(_api.sizeInPixel.y); - _r.deviceContext->RSSetViewports(1, &viewport); - } + const auto totalCellCount = static_cast(_api.cellCount.x) * static_cast(_api.cellCount.y); + const auto resize = _api.cellCount != _r.cellCount; - if (_api.cellCount != _r.cellCount) + if (resize) { - const auto totalCellCount = static_cast(_api.cellCount.x) * static_cast(_api.cellCount.y); // Let's guess that every cell consists of a surrogate pair. const auto projectedTextSize = static_cast(_api.cellCount.x) * 2; // IDWriteTextAnalyzer::GetGlyphs says: // The recommended estimate for the per-glyph output buffers is (3 * textLength / 2 + 16). - // We already set the textLength to twice the cell count. - const auto projectedGlyphSize = 3 * projectedTextSize + 16; + const auto projectedGlyphSize = 3 * projectedTextSize / 2 + 16; // This buffer is a bit larger than the others (multiple MB). // Prevent a memory usage spike, by first deallocating and then allocating. @@ -818,21 +823,47 @@ void AtlasEngine::_recreateSizeDependentResources() _api.glyphProps = Buffer{ projectedGlyphSize }; _api.glyphAdvances = Buffer{ projectedGlyphSize }; _api.glyphOffsets = Buffer{ projectedGlyphSize }; - - D3D11_BUFFER_DESC desc; - desc.ByteWidth = gsl::narrow(totalCellCount * sizeof(Cell)); // totalCellCount can theoretically be UINT32_MAX! - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - desc.StructureByteStride = sizeof(Cell); - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.cellBuffer.put())); - THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.cellBuffer.get(), nullptr, _r.cellView.put())); } - // We have called _r.deviceContext->ClearState() in the beginning and lost all D3D state. - // This forces us to set up everything up from scratch again. - _setShaderResources(); + if (!_r.d2dMode) + { + // The RenderTargetView is later used with OMSetRenderTargets + // to tell D3D where stuff is supposed to be rendered at. + { + wil::com_ptr buffer; + THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); + THROW_IF_FAILED(_r.device->CreateRenderTargetView(buffer.get(), nullptr, _r.renderTargetView.put())); + } + + // Tell D3D which parts of the render target will be visible. + // Everything outside of the viewport will be black. + // + // In the future this should cover the entire _api.sizeInPixel.x/_api.sizeInPixel.y. + // The pixel shader should draw the remaining content in the configured background color. + { + D3D11_VIEWPORT viewport{}; + viewport.Width = static_cast(_api.sizeInPixel.x); + viewport.Height = static_cast(_api.sizeInPixel.y); + _r.deviceContext->RSSetViewports(1, &viewport); + } + + if (resize) + { + D3D11_BUFFER_DESC desc; + desc.ByteWidth = gsl::narrow(totalCellCount * sizeof(Cell)); // totalCellCount can theoretically be UINT32_MAX! + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + desc.StructureByteStride = sizeof(Cell); + THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.cellBuffer.put())); + THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.cellBuffer.get(), nullptr, _r.cellView.put())); + } + + // We have called _r.deviceContext->ClearState() in the beginning and lost all D3D state. + // This forces us to set up everything up from scratch again. + _setShaderResources(); + } WI_ClearFlag(_api.invalidations, ApiInvalidations::Size); WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); @@ -1387,7 +1418,7 @@ bool AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si } it = _r.glyphs.insert(std::move(key), std::move(value)); - _r.glyphQueue.emplace_back(&it->first, &it->second); + _r.glyphQueue.emplace_back(it); } const auto valueData = it->second.data(); diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 78c3a02df4a..0b24c5a503f 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -99,8 +99,10 @@ namespace Microsoft::Console::Render friend constexpr type operator~(type v) noexcept { return static_cast(~static_cast(v)); } \ friend constexpr type operator|(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) | static_cast(rhs)); } \ friend constexpr type operator&(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) & static_cast(rhs)); } \ + friend constexpr type operator^(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) ^ static_cast(rhs)); } \ friend constexpr void operator|=(type& lhs, type rhs) noexcept { lhs = lhs | rhs; } \ - friend constexpr void operator&=(type& lhs, type rhs) noexcept { lhs = lhs & rhs; } + friend constexpr void operator&=(type& lhs, type rhs) noexcept { lhs = lhs & rhs; } \ + friend constexpr void operator^=(type& lhs, type rhs) noexcept { lhs = lhs ^ rhs; } template struct vec2 @@ -132,7 +134,7 @@ namespace Microsoft::Console::Render ATLAS_POD_OPS(rect) - constexpr bool non_empty() noexcept + constexpr bool non_empty() const noexcept { return (left < right) & (top < bottom); } @@ -347,6 +349,7 @@ namespace Microsoft::Console::Render SmallObjectOptimizer& operator=(SmallObjectOptimizer&& other) noexcept { + this->~SmallObjectOptimizer(); return *new (this) SmallObjectOptimizer(other); } @@ -507,6 +510,21 @@ namespace Microsoft::Console::Render } }; + struct CachedGlyphLayout + { + wil::com_ptr textLayout; + f32x2 halfSize; + f32x2 offset; + f32x2 scale; + D2D1_DRAW_TEXT_OPTIONS options = D2D1_DRAW_TEXT_OPTIONS_NONE; + bool scalingRequired = false; + + explicit operator bool() const noexcept; + void reset() noexcept; + void applyScaling(ID2D1RenderTarget* d2dRenderTarget, D2D1_POINT_2F origin) const noexcept; + void undoScaling(ID2D1RenderTarget* d2dRenderTarget) const noexcept; + }; + struct AtlasValueData { CellFlags flags = CellFlags::None; @@ -530,6 +548,8 @@ namespace Microsoft::Console::Render return _data.data(); } + CachedGlyphLayout cachedLayout; + private: SmallObjectOptimizer _data; @@ -539,12 +559,6 @@ namespace Microsoft::Console::Render } }; - struct AtlasQueueItem - { - const AtlasKey* key; - const AtlasValue* value; - }; - struct AtlasKeyHasher { using is_transparent = int; @@ -899,9 +913,24 @@ namespace Microsoft::Console::Render void _updateConstantBuffer() const noexcept; void _adjustAtlasSize(); void _processGlyphQueue(); - void _drawGlyph(const AtlasQueueItem& item) const; - void _drawCursor(); - + void _drawGlyph(const TileHashMap::iterator& it) const; + CachedGlyphLayout _getCachedGlyphLayout(const wchar_t* chars, u16 charsLength, u16 cellCount, IDWriteTextFormat* textFormat, bool coloredGlyph) const; + void _drawCursor(u16r rect, u32 color, bool clear); + ID2D1Brush* _brushWithColor(u32 color); + void _d2dPresent(); + void _d2dCreateRenderTarget(); + void _d2dDrawDirtyArea(); + u16 _d2dDrawGlyph(const TileHashMap::iterator& it, u16x2 coord, u32 color); + void _d2dDrawLine(u16r rect, u16 pos, u16 width, u32 color, ID2D1StrokeStyle* strokeStyle = nullptr); + void _d2dFillRectangle(u16r rect, u32 color); + void _d2dCellFlagRendererCursor(u16r rect, u32 color); + void _d2dCellFlagRendererSelected(u16r rect, u32 color); + void _d2dCellFlagRendererUnderline(u16r rect, u32 color); + void _d2dCellFlagRendererUnderlineDotted(u16r rect, u32 color); + void _d2dCellFlagRendererUnderlineDouble(u16r rect, u32 color); + void _d2dCellFlagRendererStrikethrough(u16r rect, u32 color); + + static constexpr bool debugForceD2DMode = false; static constexpr bool debugGlyphGenerationPerformance = false; static constexpr bool debugTextParsingPerformance = false || debugGlyphGenerationPerformance; static constexpr bool debugGeneralPerformance = false || debugTextParsingPerformance; @@ -947,10 +976,11 @@ namespace Microsoft::Console::Render wil::com_ptr atlasBuffer; wil::com_ptr atlasView; wil::com_ptr d2dRenderTarget; - wil::com_ptr brush; + wil::com_ptr brush; wil::com_ptr textFormats[2][2]; Buffer textFormatAxes[2][2]; wil::com_ptr typography; + wil::com_ptr dottedStrokeStyle; Buffer cells; // invalidated by ApiInvalidations::Size Buffer cellGlyphMapping; // invalidated by ApiInvalidations::Size @@ -963,17 +993,23 @@ namespace Microsoft::Console::Render u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font TileHashMap glyphs; TileAllocator tileAllocator; - std::vector glyphQueue; + std::vector glyphQueue; f32 gamma = 0; f32 cleartypeEnhancedContrast = 0; f32 grayscaleEnhancedContrast = 0; u32 backgroundColor = 0xff000000; u32 selectionColor = 0x7fffffff; + u32 brushColor = 0xffffffff; CachedCursorOptions cursorOptions; RenderInvalidations invalidations = RenderInvalidations::None; + til::rect previousDirtyRectInPx; + til::rect dirtyRect; + i16 scrollOffset = 0; + bool d2dMode = false; + #ifndef NDEBUG // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: // > For every frame it renders, the app should wait on this handle before starting any rendering operations. diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index cbff3d22185..3e8271299a0 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -44,6 +44,15 @@ constexpr bool isInInversionList(const std::array& ranges, wchar_t n return (idx & 1) != 0; } +constexpr D2D1_COLOR_F colorFromU32(uint32_t rgba) +{ + const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; + const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; + const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; + const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; + return { r, g, b, a }; +} + using namespace Microsoft::Console::Render; #pragma region IRenderEngine @@ -53,15 +62,15 @@ using namespace Microsoft::Console::Render; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { - _adjustAtlasSize(); - _processGlyphQueue(); - - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor)) + if (_r.d2dMode) { - _drawCursor(); - WI_ClearFlag(_r.invalidations, RenderInvalidations::Cursor); + _d2dPresent(); + return S_OK; } + _adjustAtlasSize(); + _processGlyphQueue(); + // The values the constant buffer depends on are potentially updated after BeginPaint(). if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) { @@ -91,19 +100,8 @@ try // > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported // > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD. // ---> No need to call IDXGISwapChain1::Present1. - // TODO: Would IDXGISwapChain1::Present1 and its dirty rects have benefits for remote desktop? THROW_IF_FAILED(_r.swapChain->Present(1, 0)); - // On some GPUs with tile based deferred rendering (TBDR) architectures, binding - // RenderTargets that already have contents in them (from previous rendering) incurs a - // cost for having to copy the RenderTarget contents back into tile memory for rendering. - // - // On Windows 10 with DXGI_SWAP_EFFECT_FLIP_DISCARD we get this for free. - if (!_sr.isWindows10OrGreater) - { - _r.deviceContext->DiscardView(_r.renderTargetView.get()); - } - return S_OK; } catch (const wil::ResultException& exception) @@ -205,13 +203,8 @@ void AtlasEngine::_adjustAtlasSize() _r.deviceContext->CopySubresourceRegion1(atlasBuffer.get(), 0, 0, 0, 0, _r.atlasBuffer.get(), 0, &box, D3D11_COPY_NO_OVERWRITE); } - _r.atlasSizeInPixel = requiredSize; - _r.atlasBuffer = std::move(atlasBuffer); - _r.atlasView = std::move(atlasView); - _setShaderResources(); - { - const auto surface = _r.atlasBuffer.query(); + const auto surface = atlasBuffer.query(); wil::com_ptr renderingParams; DWrite_GetRenderParams(_sr.dwriteFactory.get(), &_r.gamma, &_r.cleartypeEnhancedContrast, &_r.grayscaleEnhancedContrast, renderingParams.addressof()); @@ -234,55 +227,53 @@ void AtlasEngine::_adjustAtlasSize() } { static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; - wil::com_ptr brush; - THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, brush.addressof())); - _r.brush = brush.query(); + THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _r.brush.put())); + _r.brushColor = 0xffffffff; } + _r.atlasSizeInPixel = requiredSize; + _r.atlasBuffer = std::move(atlasBuffer); + _r.atlasView = std::move(atlasView); + _setShaderResources(); + WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting); } void AtlasEngine::_processGlyphQueue() { - if (_r.glyphQueue.empty()) + if (_r.glyphQueue.empty() && WI_IsFlagClear(_r.invalidations, RenderInvalidations::Cursor)) { return; } _r.d2dRenderTarget->BeginDraw(); - for (const auto& pair : _r.glyphQueue) + + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor)) { - _drawGlyph(pair); + _drawCursor({ 0, 0, 1, 1 }, 0xffffffff, true); + WI_ClearFlag(_r.invalidations, RenderInvalidations::Cursor); } - THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); + for (const auto& it : _r.glyphQueue) + { + _drawGlyph(it); + } _r.glyphQueue.clear(); + + THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); } -void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const +void AtlasEngine::_drawGlyph(const TileHashMap::iterator& it) const { - const auto key = item.key->data(); - const auto value = item.value->data(); + const auto key = it->first.data(); + const auto value = it->second.data(); const auto coords = &value->coords[0]; const auto charsLength = key->charCount; - const auto cellCount = static_cast(key->attributes.cellCount); + const auto cellCount = key->attributes.cellCount; const auto textFormat = _getTextFormat(key->attributes.bold, key->attributes.italic); const auto coloredGlyph = WI_IsFlagSet(value->flags, CellFlags::ColoredGlyph); - const f32x2 layoutBox{ cellCount * _r.cellSizeDIP.x, _r.cellSizeDIP.y }; - - // See D2DFactory::DrawText - wil::com_ptr textLayout; - THROW_IF_FAILED(_sr.dwriteFactory->CreateTextLayout(&key->chars[0], charsLength, textFormat, layoutBox.x, layoutBox.y, textLayout.addressof())); - if (_r.typography) - { - textLayout->SetTypography(_r.typography.get(), { 0, charsLength }); - } - - auto options = D2D1_DRAW_TEXT_OPTIONS_NONE; - // D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery - // which doesn't have to run if we know we can't use it anyways in the shader. - WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph); + const auto cachedLayout = _getCachedGlyphLayout(&key->chars[0], charsLength, cellCount, textFormat, coloredGlyph); // Colored glyphs cannot be drawn in linear gamma. // That's why we're simply alpha-blending them in the shader. @@ -293,11 +284,53 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const _r.d2dRenderTarget->SetTextAntialiasMode(coloredGlyph ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE); } + for (u16 i = 0; i < cellCount; ++i) + { + const auto coord = coords[i]; + + D2D1_RECT_F rect; + rect.left = static_cast(coord.x) * _r.dipPerPixel; + rect.top = static_cast(coord.y) * _r.dipPerPixel; + rect.right = rect.left + _r.cellSizeDIP.x; + rect.bottom = rect.top + _r.cellSizeDIP.y; + + D2D1_POINT_2F origin; + origin.x = rect.left - i * _r.cellSizeDIP.x; + origin.y = rect.top; + + _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); + _r.d2dRenderTarget->Clear(); + + cachedLayout.applyScaling(_r.d2dRenderTarget.get(), origin); + + // Now that we're done using origin to calculate the center point for our transformation + // we can use it for its intended purpose to slightly shift the glyph around. + origin.x += cachedLayout.offset.x; + origin.y += cachedLayout.offset.y; + _r.d2dRenderTarget->DrawTextLayout(origin, cachedLayout.textLayout.get(), _r.brush.get(), cachedLayout.options); + + cachedLayout.undoScaling(_r.d2dRenderTarget.get()); + + _r.d2dRenderTarget->PopAxisAlignedClip(); + } +} + +AtlasEngine::CachedGlyphLayout AtlasEngine::_getCachedGlyphLayout(const wchar_t* chars, u16 charsLength, u16 cellCount, IDWriteTextFormat* textFormat, bool coloredGlyph) const +{ + const f32x2 layoutBox{ cellCount * _r.cellSizeDIP.x, _r.cellSizeDIP.y }; const f32x2 halfSize{ layoutBox.x * 0.5f, layoutBox.y * 0.5f }; bool scalingRequired = false; f32x2 offset{ 0, 0 }; f32x2 scale{ 1, 1 }; + // See D2DFactory::DrawText + wil::com_ptr textLayout; + THROW_IF_FAILED(_sr.dwriteFactory->CreateTextLayout(chars, charsLength, textFormat, layoutBox.x, layoutBox.y, textLayout.addressof())); + if (_r.typography) + { + textLayout->SetTypography(_r.typography.get(), { 0, charsLength }); + } + // Block Element and Box Drawing characters need to be handled separately, // because unlike regular ones they're supposed to fill the entire layout box. // @@ -319,14 +352,14 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const // clang-format on }; - if (charsLength == 1 && isInInversionList(blockCharacters, key->chars[0])) + if (charsLength == 1 && isInInversionList(blockCharacters, chars[0])) { wil::com_ptr fontCollection; THROW_IF_FAILED(textFormat->GetFontCollection(fontCollection.addressof())); const auto baseWeight = textFormat->GetFontWeight(); const auto baseStyle = textFormat->GetFontStyle(); - TextAnalysisSource analysisSource{ &key->chars[0], 1 }; + TextAnalysisSource analysisSource{ chars, 1 }; UINT32 mappedLength = 0; wil::com_ptr mappedFont; FLOAT mappedScale = 0; @@ -351,7 +384,7 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const DWRITE_FONT_METRICS metrics; fontFace->GetMetrics(&metrics); - const u32 codePoint = key->chars[0]; + const u32 codePoint = chars[0]; u16 glyphIndex; THROW_IF_FAILED(fontFace->GetGlyphIndicesW(&codePoint, 1, &glyphIndex)); @@ -470,6 +503,10 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const offset.y += (baselineFixed - baseline) / scale.y; } + auto options = D2D1_DRAW_TEXT_OPTIONS_NONE; + // D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery + // which doesn't have to run if we know we can't use it anyways in the shader. + WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph); // !!! IMPORTANT !!! // DirectWrite/2D snaps the baseline to whole pixels, which is something we technically // want (it makes text look crisp), but fails in weird ways if `scalingRequired` is true. @@ -481,57 +518,17 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const // where every single "=" might be blatantly misaligned vertically (same for any box drawings). WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_NO_SNAP, scalingRequired); - const f32x2 inverseScale{ 1.0f - scale.x, 1.0f - scale.y }; - - for (u32 i = 0; i < cellCount; ++i) - { - const auto coord = coords[i]; - - D2D1_RECT_F rect; - rect.left = static_cast(coord.x) * _r.dipPerPixel; - rect.top = static_cast(coord.y) * _r.dipPerPixel; - rect.right = rect.left + _r.cellSizeDIP.x; - rect.bottom = rect.top + _r.cellSizeDIP.y; - - D2D1_POINT_2F origin; - origin.x = rect.left - i * _r.cellSizeDIP.x; - origin.y = rect.top; - - { - _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); - _r.d2dRenderTarget->Clear(); - } - if (scalingRequired) - { - const D2D1_MATRIX_3X2_F transform{ - scale.x, - 0, - 0, - scale.y, - (origin.x + halfSize.x) * inverseScale.x, - (origin.y + halfSize.y) * inverseScale.y, - }; - _r.d2dRenderTarget->SetTransform(&transform); - } - { - // Now that we're done using origin to calculate the center point for our transformation - // we can use it for its intended purpose to slightly shift the glyph around. - origin.x += offset.x; - origin.y += offset.y; - _r.d2dRenderTarget->DrawTextLayout(origin, textLayout.get(), _r.brush.get(), options); - } - if (scalingRequired) - { - static constexpr D2D1_MATRIX_3X2_F identity{ 1, 0, 0, 1, 0, 0 }; - _r.d2dRenderTarget->SetTransform(&identity); - } - { - _r.d2dRenderTarget->PopAxisAlignedClip(); - } - } + return CachedGlyphLayout{ + .textLayout = textLayout, + .halfSize = halfSize, + .offset = offset, + .scale = scale, + .options = options, + .scalingRequired = scalingRequired, + }; } -void AtlasEngine::_drawCursor() +void AtlasEngine::_drawCursor(u16r rect, u32 color, bool clear) { // lineWidth is in D2D's DIPs. For instance if we have a 150-200% zoom scale we want to draw a 2px wide line. // At 150% scale lineWidth thus needs to be 1.33333... because at a zoom scale of 1.5 this results in a 2px wide line. @@ -540,21 +537,21 @@ void AtlasEngine::_drawCursor() // `clip` is the rectangle within our texture atlas that's reserved for our cursor texture, ... D2D1_RECT_F clip; - clip.left = 0.0f; - clip.top = 0.0f; - clip.right = _r.cellSizeDIP.x; - clip.bottom = _r.cellSizeDIP.y; + clip.left = static_cast(rect.left) * _r.cellSizeDIP.x; + clip.top = static_cast(rect.top) * _r.cellSizeDIP.y; + clip.right = static_cast(rect.right) * _r.cellSizeDIP.x; + clip.bottom = static_cast(rect.bottom) * _r.cellSizeDIP.y; // ... whereas `rect` is just the visible (= usually white) portion of our cursor. - auto rect = clip; + auto box = clip; switch (cursorType) { case CursorType::Legacy: - rect.top = _r.cellSizeDIP.y * static_cast(100 - _r.cursorOptions.heightPercentage) / 100.0f; + box.top = box.bottom - _r.cellSizeDIP.y * static_cast(_r.cursorOptions.heightPercentage) / 100.0f; break; case CursorType::VerticalBar: - rect.right = lineWidth; + box.right = box.left + lineWidth; break; case CursorType::EmptyBox: { @@ -562,42 +559,414 @@ void AtlasEngine::_drawCursor() // coordinates in such a way that the line border extends half the width to each side. // --> Our coordinates have to be 0.5 DIP off in order to draw a 2px line on a 200% scaling. const auto halfWidth = lineWidth / 2.0f; - rect.left = halfWidth; - rect.top = halfWidth; - rect.right -= halfWidth; - rect.bottom -= halfWidth; + box.left += halfWidth; + box.top += halfWidth; + box.right -= halfWidth; + box.bottom -= halfWidth; break; } case CursorType::Underscore: case CursorType::DoubleUnderscore: - rect.top = _r.cellSizeDIP.y - lineWidth; + box.top = box.bottom - lineWidth; break; default: break; } - _r.d2dRenderTarget->BeginDraw(); + const auto brush = _brushWithColor(color); + // We need to clip the area we draw in to ensure we don't // accidentally draw into any neighboring texture atlas tiles. _r.d2dRenderTarget->PushAxisAlignedClip(&clip, D2D1_ANTIALIAS_MODE_ALIASED); - _r.d2dRenderTarget->Clear(); + + if (clear) + { + _r.d2dRenderTarget->Clear(); + } if (cursorType == CursorType::EmptyBox) { - _r.d2dRenderTarget->DrawRectangle(&rect, _r.brush.get(), lineWidth); + _r.d2dRenderTarget->DrawRectangle(&box, brush, lineWidth); } else { - _r.d2dRenderTarget->FillRectangle(&rect, _r.brush.get()); + _r.d2dRenderTarget->FillRectangle(&box, brush); } if (cursorType == CursorType::DoubleUnderscore) { - rect.top -= 2.0f; - rect.bottom -= 2.0f; - _r.d2dRenderTarget->FillRectangle(&rect, _r.brush.get()); + const auto offset = lineWidth * 2.0f; + box.top -= offset; + box.bottom -= offset; + _r.d2dRenderTarget->FillRectangle(&box, brush); } _r.d2dRenderTarget->PopAxisAlignedClip(); +} + +ID2D1Brush* AtlasEngine::_brushWithColor(u32 color) +{ + if (_r.brushColor != color) + { + const auto d2dColor = colorFromU32(color); + THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, _r.brush.put())); + _r.brushColor = color; + } + return _r.brush.get(); +} + +AtlasEngine::CachedGlyphLayout::operator bool() const noexcept +{ + return static_cast(textLayout); +} + +void AtlasEngine::CachedGlyphLayout::reset() noexcept +{ + textLayout.reset(); +} + +void AtlasEngine::CachedGlyphLayout::applyScaling(ID2D1RenderTarget* d2dRenderTarget, D2D1_POINT_2F origin) const noexcept +{ + __assume(d2dRenderTarget != nullptr); + + if (scalingRequired) + { + const D2D1_MATRIX_3X2_F transform{ + scale.x, + 0, + 0, + scale.y, + (origin.x + halfSize.x) * (1.0f - scale.x), + (origin.y + halfSize.y) * (1.0f - scale.y), + }; + d2dRenderTarget->SetTransform(&transform); + } +} + +void AtlasEngine::CachedGlyphLayout::undoScaling(ID2D1RenderTarget* d2dRenderTarget) const noexcept +{ + __assume(d2dRenderTarget != nullptr); + + if (scalingRequired) + { + static constexpr D2D1_MATRIX_3X2_F identity{ 1, 0, 0, 1, 0, 0 }; + d2dRenderTarget->SetTransform(&identity); + } +} + +void AtlasEngine::_d2dPresent() +{ + auto dirtyRectInPx = _r.dirtyRect; + dirtyRectInPx.left *= _r.fontMetrics.cellSize.x; + dirtyRectInPx.top *= _r.fontMetrics.cellSize.y; + dirtyRectInPx.right *= _r.fontMetrics.cellSize.x; + dirtyRectInPx.bottom *= _r.fontMetrics.cellSize.y; + + if (const auto intersection = _r.previousDirtyRectInPx & dirtyRectInPx) + { + wil::com_ptr backBuffer; + wil::com_ptr frontBuffer; + THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(backBuffer), backBuffer.put_void())); + THROW_IF_FAILED(_r.swapChain->GetBuffer(1, __uuidof(frontBuffer), frontBuffer.put_void())); + + D3D11_BOX intersectBox; + intersectBox.left = intersection.left; + intersectBox.top = intersection.top; + intersectBox.front = 0; + intersectBox.right = intersection.right; + intersectBox.bottom = intersection.bottom; + intersectBox.back = 1; + _r.deviceContext->CopySubresourceRegion1(backBuffer.get(), 0, intersection.left, intersection.top, 0, frontBuffer.get(), 0, &intersectBox, 0); + } + + _d2dCreateRenderTarget(); + _d2dDrawDirtyArea(); + + // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: + // > For every frame it renders, the app should wait on this handle before starting any rendering operations. + // > Note that this requirement includes the first frame the app renders with the swap chain. + assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); + + if (dirtyRectInPx) + { + { + RECT scrollRect{}; + POINT scrollOffset{}; + DXGI_PRESENT_PARAMETERS params{ + .DirtyRectsCount = 1, + .pDirtyRects = dirtyRectInPx.as_win32_rect(), + }; + + if (_r.scrollOffset) + { + scrollRect = { + 0, + std::max(0, _r.scrollOffset), + _r.cellCount.x, + _r.cellCount.y + std::min(0, _r.scrollOffset), + }; + scrollOffset = { + 0, + _r.scrollOffset, + }; + + scrollRect.top *= _r.fontMetrics.cellSize.y; + scrollRect.right *= _r.fontMetrics.cellSize.x; + scrollRect.bottom *= _r.fontMetrics.cellSize.y; + + scrollOffset.y *= _r.fontMetrics.cellSize.y; + + params.pScrollRect = &scrollRect; + params.pScrollOffset = &scrollOffset; + } + + THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); + } + } + else + { + THROW_IF_FAILED(_r.swapChain->Present(1, 0)); + } + + _r.previousDirtyRectInPx = dirtyRectInPx; +} + +void AtlasEngine::_d2dCreateRenderTarget() +{ + if (_r.d2dRenderTarget) + { + return; + } + + { + wil::com_ptr buffer; + THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); + + const auto surface = buffer.query(); + + D2D1_RENDER_TARGET_PROPERTIES props{}; + props.type = D2D1_RENDER_TARGET_TYPE_DEFAULT; + props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; + props.dpiX = static_cast(_r.dpi); + props.dpiY = static_cast(_r.dpi); + THROW_IF_FAILED(_sr.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, _r.d2dRenderTarget.put())); + + // In case _api.realizedAntialiasingMode is D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE we'll + // continuously adjust it in AtlasEngine::_drawGlyph. See _drawGlyph. + _r.d2dRenderTarget->SetTextAntialiasMode(static_cast(_api.realizedAntialiasingMode)); + } + { + static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; + THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _r.brush.put())); + _r.brushColor = 0xffffffff; + } +} + +void AtlasEngine::_d2dDrawDirtyArea() +{ + struct CellFlagHandler + { + CellFlags filter; + decltype(&AtlasEngine::_d2dCellFlagRendererCursor) func; + }; + + static constexpr std::array cellFlagHandlers{ + // Ordered by lowest to highest "layer". + // The selection for instance is drawn on top of underlines, not under them. + CellFlagHandler{ CellFlags::Underline, &AtlasEngine::_d2dCellFlagRendererUnderline }, + CellFlagHandler{ CellFlags::UnderlineDotted, &AtlasEngine::_d2dCellFlagRendererUnderlineDotted }, + CellFlagHandler{ CellFlags::UnderlineDouble, &AtlasEngine::_d2dCellFlagRendererUnderlineDouble }, + CellFlagHandler{ CellFlags::Strikethrough, &AtlasEngine::_d2dCellFlagRendererStrikethrough }, + CellFlagHandler{ CellFlags::Cursor, &AtlasEngine::_d2dCellFlagRendererCursor }, + CellFlagHandler{ CellFlags::Selected, &AtlasEngine::_d2dCellFlagRendererSelected }, + }; + + u16 left = 0; + u16 top = 0; + u16 right = _r.cellCount.x; + u16 bottom = _r.cellCount.y; + if (_r.dirtyRect) + { + left = gsl::narrow(_r.dirtyRect.left); + top = gsl::narrow(_r.dirtyRect.top); + right = gsl::narrow(_r.dirtyRect.right); + bottom = gsl::narrow(_r.dirtyRect.bottom); + } + + _r.d2dRenderTarget->BeginDraw(); + + for (u16 y = top; y < bottom; ++y) + { + const Cell* cells = _getCell(0, y); + const TileHashMap::iterator* cellGlyphMappings = _getCellGlyphMapping(0, y); + + // Draw background. + { + auto x1 = left; + auto x2 = gsl::narrow_cast(x1 + 1); + auto currentColor = cells[x1].color.y; + + for (; x2 < right; ++x2) + { + const auto color = cells[x2].color.y; + + if (currentColor != color) + { + const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; + _d2dFillRectangle(rect, currentColor); + x1 = x2; + currentColor = color; + } + } + + { + const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; + _d2dFillRectangle(rect, currentColor); + } + } + + // Draw text. + for (u16 x = left, cellCount = 0; x < right; x += cellCount) + { + const auto& it = cellGlyphMappings[x]; + const u16x2 coord{ x, y }; + const auto color = cells[x].color.x; + cellCount = _d2dDrawGlyph(it, coord, color); + } + + // Draw underlines, cursors, selections, etc. + for (const auto& handler : cellFlagHandlers) + { + auto x1 = left; + auto currentFlags = CellFlags::None; + + for (u16 x2 = left; x2 < right; ++x2) + { + const auto flags = cells[x2].flags & handler.filter; + + if (currentFlags != flags) + { + if (currentFlags != CellFlags::None) + { + const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; + const auto color = cells[x1].color.x; + (this->*handler.func)(rect, color); + } + + x1 = x2; + currentFlags = flags; + } + } + + if (currentFlags != CellFlags::None) + { + const u16r rect{ x1, y, right, gsl::narrow_cast(y + 1) }; + const auto color = cells[x1].color.x; + (this->*handler.func)(rect, color); + } + } + } + THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); } + +// See _drawGlyph() for reference. +AtlasEngine::u16 AtlasEngine::_d2dDrawGlyph(const TileHashMap::iterator& it, const u16x2 coord, const u32 color) +{ + const auto key = it->first.data(); + const auto value = it->second.data(); + const auto charsLength = key->charCount; + const auto cellCount = key->attributes.cellCount; + const auto textFormat = _getTextFormat(key->attributes.bold, key->attributes.italic); + const auto coloredGlyph = WI_IsFlagSet(value->flags, CellFlags::ColoredGlyph); + + auto& cachedLayout = it->second.cachedLayout; + if (!cachedLayout) + { + cachedLayout = _getCachedGlyphLayout(&key->chars[0], charsLength, cellCount, textFormat, coloredGlyph); + } + + D2D1_RECT_F rect; + rect.left = static_cast(coord.x) * _r.cellSizeDIP.x; + rect.top = static_cast(coord.y) * _r.cellSizeDIP.y; + rect.right = static_cast(coord.x + cellCount) * _r.cellSizeDIP.x; + rect.bottom = rect.top + _r.cellSizeDIP.y; + + D2D1_POINT_2F origin; + origin.x = rect.left; + origin.y = rect.top; + + _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); + + cachedLayout.applyScaling(_r.d2dRenderTarget.get(), origin); + + origin.x += cachedLayout.offset.x; + origin.y += cachedLayout.offset.y; + _r.d2dRenderTarget->DrawTextLayout(origin, cachedLayout.textLayout.get(), _brushWithColor(color), cachedLayout.options); + + cachedLayout.undoScaling(_r.d2dRenderTarget.get()); + + _r.d2dRenderTarget->PopAxisAlignedClip(); + + return cellCount; +} + +void AtlasEngine::_d2dDrawLine(u16r rect, u16 pos, u16 width, u32 color, ID2D1StrokeStyle* strokeStyle) +{ + const auto w = static_cast(width) * _r.dipPerPixel; + const auto y1 = static_cast(rect.top) * _r.cellSizeDIP.y + static_cast(pos) * _r.dipPerPixel + w * 0.5f; + const auto x1 = static_cast(rect.left) * _r.cellSizeDIP.x; + const auto x2 = static_cast(rect.right) * _r.cellSizeDIP.x; + const auto brush = _brushWithColor(color); + _r.d2dRenderTarget->DrawLine({ x1, y1 }, { x2, y1 }, brush, w, strokeStyle); +} + +void AtlasEngine::_d2dFillRectangle(u16r rect, u32 color) +{ + const D2D1_RECT_F r{ + .left = static_cast(rect.left) * _r.cellSizeDIP.x, + .top = static_cast(rect.top) * _r.cellSizeDIP.y, + .right = static_cast(rect.right) * _r.cellSizeDIP.x, + .bottom = static_cast(rect.bottom) * _r.cellSizeDIP.y, + }; + const auto brush = _brushWithColor(color); + _r.d2dRenderTarget->FillRectangle(r, brush); +} + +void AtlasEngine::_d2dCellFlagRendererCursor(u16r rect, u32 color) +{ + _drawCursor(rect, _r.cursorOptions.cursorColor, false); +} + +void AtlasEngine::_d2dCellFlagRendererSelected(u16r rect, u32 color) +{ + _d2dFillRectangle(rect, _r.selectionColor); +} + +void AtlasEngine::_d2dCellFlagRendererUnderline(u16r rect, u32 color) +{ + _d2dDrawLine(rect, _r.fontMetrics.underlinePos, _r.fontMetrics.underlineWidth, color); +} + +void AtlasEngine::_d2dCellFlagRendererUnderlineDotted(u16r rect, u32 color) +{ + if (!_r.dottedStrokeStyle) + { + static constexpr D2D1_STROKE_STYLE_PROPERTIES props{ .dashStyle = D2D1_DASH_STYLE_CUSTOM }; + static constexpr FLOAT dashes[2]{ 1, 2 }; + THROW_IF_FAILED(_sr.d2dFactory->CreateStrokeStyle(&props, &dashes[0], 2, _r.dottedStrokeStyle.addressof())); + } + + _d2dDrawLine(rect, _r.fontMetrics.underlinePos, _r.fontMetrics.underlineWidth, color, _r.dottedStrokeStyle.get()); +} + +void AtlasEngine::_d2dCellFlagRendererUnderlineDouble(u16r rect, u32 color) +{ + _d2dDrawLine(rect, _r.fontMetrics.doubleUnderlinePos.x, _r.fontMetrics.thinLineWidth, color); + _d2dDrawLine(rect, _r.fontMetrics.doubleUnderlinePos.y, _r.fontMetrics.thinLineWidth, color); +} + +void AtlasEngine::_d2dCellFlagRendererStrikethrough(u16r rect, u32 color) +{ + _d2dDrawLine(rect, _r.fontMetrics.strikethroughPos, _r.fontMetrics.strikethroughWidth, color); +} From 3532e665f334f07760aedb886d604f9560312e6a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 24 Aug 2022 00:22:14 +0200 Subject: [PATCH 02/10] Address feedback --- src/renderer/atlas/AtlasEngine.cpp | 5 +-- src/renderer/atlas/AtlasEngine.h | 4 +- src/renderer/atlas/AtlasEngine.r.cpp | 56 ++++++++++++++-------------- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index f7276428193..6955e3f9036 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -324,7 +324,7 @@ void AtlasEngine::WaitUntilCanRender() noexcept { if constexpr (!debugGeneralPerformance) { - WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), INFINITE, true); + WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); #ifndef NDEBUG _r.frameLatencyWaitableObjectUsed = true; #endif @@ -837,9 +837,6 @@ void AtlasEngine::_recreateSizeDependentResources() // Tell D3D which parts of the render target will be visible. // Everything outside of the viewport will be black. - // - // In the future this should cover the entire _api.sizeInPixel.x/_api.sizeInPixel.y. - // The pixel shader should draw the remaining content in the configured background color. { D3D11_VIEWPORT viewport{}; viewport.Width = static_cast(_api.sizeInPixel.x); diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 0b24c5a503f..800c1137070 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -349,8 +349,8 @@ namespace Microsoft::Console::Render SmallObjectOptimizer& operator=(SmallObjectOptimizer&& other) noexcept { - this->~SmallObjectOptimizer(); - return *new (this) SmallObjectOptimizer(other); + std::destroy_at(this); + return *std::construct_at(this, std::move(other)); } ~SmallObjectOptimizer() diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 3e8271299a0..354f509bc79 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -62,7 +62,7 @@ using namespace Microsoft::Console::Render; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { - if (_r.d2dMode) + if (_r.d2dMode) [[unlikely]] { _d2dPresent(); return S_OK; @@ -689,39 +689,37 @@ void AtlasEngine::_d2dPresent() if (dirtyRectInPx) { + RECT scrollRect{}; + POINT scrollOffset{}; + DXGI_PRESENT_PARAMETERS params{ + .DirtyRectsCount = 1, + .pDirtyRects = dirtyRectInPx.as_win32_rect(), + }; + + if (_r.scrollOffset) { - RECT scrollRect{}; - POINT scrollOffset{}; - DXGI_PRESENT_PARAMETERS params{ - .DirtyRectsCount = 1, - .pDirtyRects = dirtyRectInPx.as_win32_rect(), + scrollRect = { + 0, + std::max(0, _r.scrollOffset), + _r.cellCount.x, + _r.cellCount.y + std::min(0, _r.scrollOffset), + }; + scrollOffset = { + 0, + _r.scrollOffset, }; - if (_r.scrollOffset) - { - scrollRect = { - 0, - std::max(0, _r.scrollOffset), - _r.cellCount.x, - _r.cellCount.y + std::min(0, _r.scrollOffset), - }; - scrollOffset = { - 0, - _r.scrollOffset, - }; - - scrollRect.top *= _r.fontMetrics.cellSize.y; - scrollRect.right *= _r.fontMetrics.cellSize.x; - scrollRect.bottom *= _r.fontMetrics.cellSize.y; - - scrollOffset.y *= _r.fontMetrics.cellSize.y; - - params.pScrollRect = &scrollRect; - params.pScrollOffset = &scrollOffset; - } + scrollRect.top *= _r.fontMetrics.cellSize.y; + scrollRect.right *= _r.fontMetrics.cellSize.x; + scrollRect.bottom *= _r.fontMetrics.cellSize.y; + + scrollOffset.y *= _r.fontMetrics.cellSize.y; - THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); + params.pScrollRect = &scrollRect; + params.pScrollOffset = &scrollOffset; } + + THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); } else { From da7cdabaacd4b85039dcd09c5ab94c3e5fea6960 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 26 Aug 2022 23:24:16 +0200 Subject: [PATCH 03/10] Fix multiple bugs/issues * Fill gutters with color * Detect RDP via DXGI flags * Detect device changes via DXGI * Properly treat selection and cursor changes as invalidations * The D2D mode renderer used to redraw the entire viewport whenever the dirty rect is empty which happens when dragging a selection * With the above fix in place we sometimes don't Present() so we now also need to sometimes not wait on the frame event * Listen to const-buffer data being changed and redraw everything * Stop setting the const-buffer on every frame --- src/renderer/atlas/AtlasEngine.cpp | 48 ++++----- src/renderer/atlas/AtlasEngine.h | 4 + src/renderer/atlas/AtlasEngine.r.cpp | 149 +++++++++++++++++++-------- 3 files changed, 130 insertions(+), 71 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 6955e3f9036..92555aec0d6 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -110,6 +110,7 @@ try { _r.selectionColor = _api.selectionColor; WI_SetFlag(_r.invalidations, RenderInvalidations::ConstBuffer); + WI_ClearFlag(_api.invalidations, ApiInvalidations::Settings); } // Equivalent to InvalidateAll(). @@ -243,23 +244,8 @@ try } _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y }; - - // Skip partial updates in the renderer if we redraw everything. - if (_api.invalidatedRows == u16x2{ 0, _r.cellCount.y }) - { - _r.dirtyRect = {}; - _r.scrollOffset = 0; - } - else - { - _r.dirtyRect = _api.dirtyRect | til::rect{ - _api.invalidatedCursorArea.left, - _api.invalidatedCursorArea.top, - _api.invalidatedCursorArea.right, - _api.invalidatedCursorArea.bottom, - }; - _r.scrollOffset = _api.scrollOffset; - } + _r.dirtyRect = _api.dirtyRect; + _r.scrollOffset = _api.scrollOffset; // This is an important block of code for our TileHashMap. // We only process glyphs within the dirtyRect, but glyphs outside of the @@ -322,7 +308,10 @@ CATCH_RETURN() void AtlasEngine::WaitUntilCanRender() noexcept { - if constexpr (!debugGeneralPerformance) + // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. + // Once we've waited on the event, waiting on it again will block until the timeout elapses. + // _r.waitForPresentation guards against this. + if (!debugGeneralPerformance && std::exchange(_r.waitForPresentation, false)) { WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); #ifndef NDEBUG @@ -439,6 +428,7 @@ try rect.narrow_bottom(), }; _setCellFlags(u16rect, CellFlags::Selected, CellFlags::Selected); + _r.dirtyRect |= rect; return S_OK; } CATCH_RETURN() @@ -465,9 +455,10 @@ try } // Clear the previous cursor - if (_api.invalidatedCursorArea.non_empty()) + if (const auto r = _api.invalidatedCursorArea; r.non_empty()) { - _setCellFlags(_api.invalidatedCursorArea, CellFlags::Cursor, CellFlags::None); + _setCellFlags(r, CellFlags::Cursor, CellFlags::None); + _r.dirtyRect |= til::rect{ r.left, r.top, r.right, r.bottom }; } if (options.isOn) @@ -481,6 +472,7 @@ try const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _r.cellCount.x - 0)); const auto bottom = gsl::narrow_cast(y + 1); _setCellFlags({ x, y, right, bottom }, CellFlags::Cursor, CellFlags::Cursor); + _r.dirtyRect |= til::rect{ x, y, right, bottom }; } return S_OK; @@ -641,12 +633,15 @@ void AtlasEngine::_createResources() _r.deviceContext = deviceContext.query(); } - // > You should not use GetSystemMetrics(SM_REMOTESESSION) to determine if your application is running - // > in a remote session in Windows 8 and later or Windows Server 2012 and later if the remote session - // > may also be using the RemoteFX vGPU improvements to the Microsoft Remote Display Protocol (RDP). - // > In this case, GetSystemMetrics(SM_REMOTESESSION) will identify the remote session as a local session. - // This actually sounds great for us. The non-d2dMode of AtlasEngine has more features, but requires a GPU. - _r.d2dMode = debugForceD2DMode || GetSystemMetrics(SM_REMOTESESSION); + { + wil::com_ptr dxgiAdapter; + THROW_IF_FAILED(_r.device.query()->GetParent(__uuidof(dxgiAdapter), dxgiAdapter.put_void())); + THROW_IF_FAILED(dxgiAdapter->GetParent(__uuidof(_r.dxgiFactory), _r.dxgiFactory.put_void())); + + DXGI_ADAPTER_DESC1 desc; + THROW_IF_FAILED(dxgiAdapter->GetDesc1(&desc)); + _r.d2dMode = debugForceD2DMode || WI_IsAnyFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_REMOTE | DXGI_ADAPTER_FLAG_SOFTWARE); + } #ifndef NDEBUG // D3D debug messages @@ -753,6 +748,7 @@ void AtlasEngine::_createSwapChain() // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: // > For every frame it renders, the app should wait on this handle before starting any rendering operations. // > Note that this requirement includes the first frame the app renders with the swap chain. + _r.waitForPresentation = true; WaitUntilCanRender(); if (_api.swapChainChangedCallback) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 800c1137070..347dab8083d 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -960,6 +960,9 @@ namespace Microsoft::Console::Render struct Resources { + // DXGI resources + wil::com_ptr dxgiFactory; + // D3D resources wil::com_ptr device; wil::com_ptr deviceContext; @@ -1009,6 +1012,7 @@ namespace Microsoft::Console::Render til::rect dirtyRect; i16 scrollOffset = 0; bool d2dMode = false; + bool waitForPresentation = false; #ifndef NDEBUG // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index d2274ae42c3..8f6adf9d4bf 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -62,45 +62,53 @@ using namespace Microsoft::Console::Render; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { + // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: + // > For every frame it renders, the app should wait on this handle before starting any rendering operations. + // > Note that this requirement includes the first frame the app renders with the swap chain. + assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); + if (_r.d2dMode) [[unlikely]] { + // TODO gutters _d2dPresent(); - return S_OK; } - - _adjustAtlasSize(); - _processGlyphQueue(); - - // The values the constant buffer depends on are potentially updated after BeginPaint(). - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + else { - _updateConstantBuffer(); - WI_ClearFlag(_r.invalidations, RenderInvalidations::ConstBuffer); - } + _adjustAtlasSize(); + _processGlyphQueue(); - { + // The values the constant buffer depends on are potentially updated after BeginPaint(). + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + { + _updateConstantBuffer(); + WI_ClearFlag(_r.invalidations, RenderInvalidations::ConstBuffer); + } + + { #pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). - D3D11_MAPPED_SUBRESOURCE mapped; - THROW_IF_FAILED(_r.deviceContext->Map(_r.cellBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - assert(mapped.RowPitch >= _r.cells.size() * sizeof(Cell)); - memcpy(mapped.pData, _r.cells.data(), _r.cells.size() * sizeof(Cell)); - _r.deviceContext->Unmap(_r.cellBuffer.get(), 0); - } + D3D11_MAPPED_SUBRESOURCE mapped; + THROW_IF_FAILED(_r.deviceContext->Map(_r.cellBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + assert(mapped.RowPitch >= _r.cells.size() * sizeof(Cell)); + memcpy(mapped.pData, _r.cells.data(), _r.cells.size() * sizeof(Cell)); + _r.deviceContext->Unmap(_r.cellBuffer.get(), 0); + } - // After Present calls, the back buffer needs to explicitly be - // re-bound to the D3D11 immediate context before it can be used again. - _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); - _r.deviceContext->Draw(3, 0); + // After Present calls, the back buffer needs to explicitly be + // re-bound to the D3D11 immediate context before it can be used again. + _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); + _r.deviceContext->Draw(3, 0); - // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: - // > For every frame it renders, the app should wait on this handle before starting any rendering operations. - // > Note that this requirement includes the first frame the app renders with the swap chain. - assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); + // > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported + // > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD. + // ---> No need to call IDXGISwapChain1::Present1. + THROW_IF_FAILED(_r.swapChain->Present(1, 0)); + _r.waitForPresentation = true; + } - // > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported - // > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD. - // ---> No need to call IDXGISwapChain1::Present1. - THROW_IF_FAILED(_r.swapChain->Present(1, 0)); + if (!_r.dxgiFactory->IsCurrent()) + { + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + } return S_OK; } @@ -656,6 +664,19 @@ void AtlasEngine::CachedGlyphLayout::undoScaling(ID2D1RenderTarget* d2dRenderTar void AtlasEngine::_d2dPresent() { + const til::rect fullRect{ 0, 0, _r.cellCount.x, _r.cellCount.y }; + + // A change in the selection or background color (etc.) forces a full redraw. + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + { + _r.dirtyRect = fullRect; + } + + if (!_r.dirtyRect) + { + return; + } + auto dirtyRectInPx = _r.dirtyRect; dirtyRectInPx.left *= _r.fontMetrics.cellSize.x; dirtyRectInPx.top *= _r.fontMetrics.cellSize.y; @@ -687,7 +708,7 @@ void AtlasEngine::_d2dPresent() // > Note that this requirement includes the first frame the app renders with the swap chain. assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); - if (dirtyRectInPx) + if (_r.dirtyRect != fullRect) { RECT scrollRect{}; POINT scrollOffset{}; @@ -720,13 +741,16 @@ void AtlasEngine::_d2dPresent() } THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); + _r.waitForPresentation = true; } else { THROW_IF_FAILED(_r.swapChain->Present(1, 0)); + _r.waitForPresentation = true; } _r.previousDirtyRectInPx = dirtyRectInPx; + WI_ClearAllFlags(_r.invalidations, RenderInvalidations::Cursor | RenderInvalidations::ConstBuffer); } void AtlasEngine::_d2dCreateRenderTarget() @@ -779,32 +803,67 @@ void AtlasEngine::_d2dDrawDirtyArea() CellFlagHandler{ CellFlags::Selected, &AtlasEngine::_d2dCellFlagRendererSelected }, }; - u16 left = 0; - u16 top = 0; - u16 right = _r.cellCount.x; - u16 bottom = _r.cellCount.y; - if (_r.dirtyRect) + auto left = gsl::narrow(_r.dirtyRect.left); + auto top = gsl::narrow(_r.dirtyRect.top); + auto right = gsl::narrow(_r.dirtyRect.right); + auto bottom = gsl::narrow(_r.dirtyRect.bottom); + if constexpr (debugGlyphGenerationPerformance) { - left = gsl::narrow(_r.dirtyRect.left); - top = gsl::narrow(_r.dirtyRect.top); - right = gsl::narrow(_r.dirtyRect.right); - bottom = gsl::narrow(_r.dirtyRect.bottom); + left = 0; + top = 0; + right = _r.cellCount.x; + bottom = _r.cellCount.y; } _r.d2dRenderTarget->BeginDraw(); + if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) + { + _r.d2dRenderTarget->Clear(colorFromU32(_r.backgroundColor)); + } + for (u16 y = top; y < bottom; ++y) { const Cell* cells = _getCell(0, y); const TileHashMap::iterator* cellGlyphMappings = _getCellGlyphMapping(0, y); + // left/right might intersect a wide glyph. We have to extend left/right + // to include the entire glyph so that we can properly render it. + // Since a series of identical narrow glyphs (2 spaces for instance) are stored in cellGlyphMappings + // just like a single wide glyph (2 references to the same glyph in a row), the only way for us to + // know where wide glyphs begin and end is to iterate the entire row and use the stored `cellCount`. + u16 beg = 0; + for (;;) + { + const auto cellCount = cellGlyphMappings[beg]->first.data()->attributes.cellCount; + const auto begNext = gsl::narrow_cast(beg + cellCount); + + if (begNext > left) + { + break; + } + + beg = begNext; + } + auto end = beg; + for (;;) + { + const auto cellCount = cellGlyphMappings[end]->first.data()->attributes.cellCount; + end += cellCount; + + if (end >= right) + { + break; + } + } + // Draw background. { - auto x1 = left; + auto x1 = beg; auto x2 = gsl::narrow_cast(x1 + 1); auto currentColor = cells[x1].color.y; - for (; x2 < right; ++x2) + for (; x2 < end; ++x2) { const auto color = cells[x2].color.y; @@ -824,21 +883,21 @@ void AtlasEngine::_d2dDrawDirtyArea() } // Draw text. - for (u16 x = left, cellCount = 0; x < right; x += cellCount) + for (auto x = beg; x < end;) { const auto& it = cellGlyphMappings[x]; const u16x2 coord{ x, y }; const auto color = cells[x].color.x; - cellCount = _d2dDrawGlyph(it, coord, color); + x += _d2dDrawGlyph(it, coord, color); } // Draw underlines, cursors, selections, etc. for (const auto& handler : cellFlagHandlers) { - auto x1 = left; + auto x1 = beg; auto currentFlags = CellFlags::None; - for (u16 x2 = left; x2 < right; ++x2) + for (auto x2 = beg; x2 < end; ++x2) { const auto flags = cells[x2].flags & handler.filter; From 3a0438b552549a0480f87367c05b3c2bfea089be Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 26 Aug 2022 23:40:28 +0200 Subject: [PATCH 04/10] Address feedback --- src/renderer/atlas/AtlasEngine.r.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 8f6adf9d4bf..5f8964b6abe 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -741,15 +741,14 @@ void AtlasEngine::_d2dPresent() } THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); - _r.waitForPresentation = true; } else { THROW_IF_FAILED(_r.swapChain->Present(1, 0)); - _r.waitForPresentation = true; } _r.previousDirtyRectInPx = dirtyRectInPx; + _r.waitForPresentation = true; WI_ClearAllFlags(_r.invalidations, RenderInvalidations::Cursor | RenderInvalidations::ConstBuffer); } From 90cc4b9a3c9fdbf855bf5a4245501f8b2e89f828 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sat, 27 Aug 2022 00:03:01 +0200 Subject: [PATCH 05/10] Fix a memory leak --- src/renderer/atlas/AtlasEngine.r.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 5f8964b6abe..65709df83f6 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -747,6 +747,7 @@ void AtlasEngine::_d2dPresent() THROW_IF_FAILED(_r.swapChain->Present(1, 0)); } + _r.glyphQueue.clear(); _r.previousDirtyRectInPx = dirtyRectInPx; _r.waitForPresentation = true; WI_ClearAllFlags(_r.invalidations, RenderInvalidations::Cursor | RenderInvalidations::ConstBuffer); From bc12e2fb77793f115d96519d8d70bb6cfb257689 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sat, 27 Aug 2022 00:06:52 +0200 Subject: [PATCH 06/10] Treat the given point size exactly --- src/renderer/atlas/AtlasEngine.api.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index b738d5ece4b..4a9bbe390ae 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -606,7 +606,8 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo // Point sizes are commonly treated at a 72 DPI scale // (including by OpenType), whereas DirectWrite uses 96 DPI. // Since we want the height in px we multiply by the display's DPI. - const auto fontSizeInPx = std::roundf(requestedSize.Y / 72.0f * _api.dpi); + const auto fontSizeInDIP = requestedSize.Y / 72.0f * 96.0f; + const auto fontSizeInPx = requestedSize.Y / 72.0f * _api.dpi; const auto designUnitsPerPx = fontSizeInPx / static_cast(metrics.designUnitsPerEm); const auto ascent = static_cast(metrics.ascent) * designUnitsPerPx; @@ -687,7 +688,7 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->fontCollection = std::move(fontCollection); fontMetrics->fontName = std::move(fontName); - fontMetrics->fontSizeInDIP = fontSizeInPx / static_cast(_api.dpi) * 96.0f; + fontMetrics->fontSizeInDIP = fontSizeInDIP; fontMetrics->baselineInDIP = baseline / static_cast(_api.dpi) * 96.0f; fontMetrics->advanceScale = cellWidth / advanceWidth; fontMetrics->cellSize = { cellWidth, cellHeight }; From 3c359c49afe70bd6946dc638393bebb69e5a9a53 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 30 Aug 2022 22:31:05 +0200 Subject: [PATCH 07/10] AtlasEngine: Implement support for custom shaders --- samples/PixelShaders/Retro.hlsl | 55 +++++----- src/renderer/atlas/AtlasEngine.api.cpp | 31 ++++-- src/renderer/atlas/AtlasEngine.cpp | 122 ++++++++++++++++++----- src/renderer/atlas/AtlasEngine.h | 41 +++++++- src/renderer/atlas/AtlasEngine.r.cpp | 106 ++++++++++++++++++-- src/renderer/atlas/atlas.vcxproj | 22 ++++ src/renderer/atlas/custom_shader_ps.hlsl | 82 +++++++++++++++ src/renderer/atlas/custom_shader_vs.hlsl | 17 ++++ src/renderer/dx/DxRenderer.cpp | 6 +- 9 files changed, 406 insertions(+), 76 deletions(-) create mode 100644 src/renderer/atlas/custom_shader_ps.hlsl create mode 100644 src/renderer/atlas/custom_shader_vs.hlsl diff --git a/samples/PixelShaders/Retro.hlsl b/samples/PixelShaders/Retro.hlsl index cfefe04e7da..0073f2ca87c 100644 --- a/samples/PixelShaders/Retro.hlsl +++ b/samples/PixelShaders/Retro.hlsl @@ -2,47 +2,46 @@ Texture2D shaderTexture; SamplerState samplerState; -cbuffer PixelShaderSettings { - float Time; - float Scale; - float2 Resolution; - float4 Background; +cbuffer PixelShaderSettings +{ + float time; + float scale; + float2 resolution; + float4 background; }; -#define SCANLINE_FACTOR 0.5 -#define SCALED_SCANLINE_PERIOD Scale -#define SCALED_GAUSSIAN_SIGMA (2.0*Scale) +#define SCANLINE_FACTOR 0.5f +#define SCALED_SCANLINE_PERIOD scale +#define SCALED_GAUSSIAN_SIGMA (2.0f * scale) static const float M_PI = 3.14159265f; float Gaussian2D(float x, float y, float sigma) { - return 1/(sigma*sqrt(2*M_PI)) * exp(-0.5*(x*x + y*y)/sigma/sigma); + return 1 / (sigma * sqrt(2 * M_PI)) * exp(-0.5 * (x * x + y * y) / sigma / sigma); } float4 Blur(Texture2D input, float2 tex_coord, float sigma) { - uint width, height; + float width, height; shaderTexture.GetDimensions(width, height); - float texelWidth = 1.0f/width; - float texelHeight = 1.0f/height; + float texelWidth = 1.0f / width; + float texelHeight = 1.0f / height; float4 color = { 0, 0, 0, 0 }; - int sampleCount = 13; + float sampleCount = 13; - for (int x = 0; x < sampleCount; x++) + for (float x = 0; x < sampleCount; x++) { float2 samplePos = { 0, 0 }; + samplePos.x = tex_coord.x + (x - sampleCount / 2.0f) * texelWidth; - samplePos.x = tex_coord.x + (x - sampleCount/2) * texelWidth; - for (int y = 0; y < sampleCount; y++) + for (float y = 0; y < sampleCount; y++) { - samplePos.y = tex_coord.y + (y - sampleCount/2) * texelHeight; - if (samplePos.x <= 0 || samplePos.y <= 0 || samplePos.x >= width || samplePos.y >= height) continue; - - color += input.Sample(samplerState, samplePos) * Gaussian2D((x - sampleCount/2), (y - sampleCount/2), sigma); + samplePos.y = tex_coord.y + (y - sampleCount / 2.0f) * texelHeight; + color += input.Sample(samplerState, samplePos) * Gaussian2D(x - sampleCount / 2.0f, y - sampleCount / 2.0f, sigma); } } @@ -51,7 +50,7 @@ float4 Blur(Texture2D input, float2 tex_coord, float sigma) float SquareWave(float y) { - return 1 - (floor(y / SCALED_SCANLINE_PERIOD) % 2) * SCANLINE_FACTOR; + return 1.0f - (floor(y / SCALED_SCANLINE_PERIOD) % 2.0f) * SCANLINE_FACTOR; } float4 Scanline(float4 color, float4 pos) @@ -60,9 +59,9 @@ float4 Scanline(float4 color, float4 pos) // TODO:GH#3929 make this configurable. // Remove the && false to draw scanlines everywhere. - if (length(color.rgb) < 0.2 && false) + if (length(color.rgb) < 0.2f && false) { - return color + wave*0.1; + return color + wave * 0.1f; } else { @@ -70,14 +69,14 @@ float4 Scanline(float4 color, float4 pos) } } +// clang-format off float4 main(float4 pos : SV_POSITION, float2 tex : TEXCOORD) : SV_TARGET +// clang-format on { - Texture2D input = shaderTexture; - // TODO:GH#3930 Make these configurable in some way. - float4 color = input.Sample(samplerState, tex); - color += Blur(input, tex, SCALED_GAUSSIAN_SIGMA)*0.3; + float4 color = shaderTexture.Sample(samplerState, tex); + color += Blur(shaderTexture, tex, SCALED_GAUSSIAN_SIGMA) * 0.3f; color = Scanline(color, pos); return color; -} \ No newline at end of file +} diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 4a9bbe390ae..647d57bb7ea 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -293,7 +293,7 @@ HRESULT AtlasEngine::Enable() noexcept [[nodiscard]] bool AtlasEngine::GetRetroTerminalEffect() const noexcept { - return false; + return _api.useRetroTerminalEffect; } [[nodiscard]] float AtlasEngine::GetScaling() const noexcept @@ -332,7 +332,7 @@ void AtlasEngine::SetAntialiasingMode(const D2D1_TEXT_ANTIALIAS_MODE antialiasin if (_api.antialiasingMode != mode) { _api.antialiasingMode = mode; - _resolveAntialiasingMode(); + _resolveTransparencySettings(); WI_SetFlag(_api.invalidations, ApiInvalidations::Font); } } @@ -344,11 +344,10 @@ void AtlasEngine::SetCallback(std::function pfn) noexcept void AtlasEngine::EnableTransparentBackground(const bool isTransparent) noexcept { - const auto mixin = !isTransparent ? 0xff000000 : 0x00000000; - if (_api.backgroundOpaqueMixin != mixin) + if (_api.enableTransparentBackground != isTransparent) { - _api.backgroundOpaqueMixin = mixin; - _resolveAntialiasingMode(); + _api.enableTransparentBackground = isTransparent; + _resolveTransparencySettings(); WI_SetFlag(_api.invalidations, ApiInvalidations::SwapChain); } } @@ -369,10 +368,22 @@ void AtlasEngine::SetForceFullRepaintRendering(bool enable) noexcept void AtlasEngine::SetPixelShaderPath(std::wstring_view value) noexcept { + if (_api.customPixelShaderPath != value) + { + _api.customPixelShaderPath = value; + _resolveTransparencySettings(); + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + } } void AtlasEngine::SetRetroTerminalEffect(bool enable) noexcept { + if (_api.useRetroTerminalEffect != enable) + { + _api.useRetroTerminalEffect = enable; + _resolveTransparencySettings(); + WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + } } void AtlasEngine::SetSelectionBackground(const COLORREF color, const float alpha) noexcept @@ -451,13 +462,15 @@ void AtlasEngine::UpdateHyperlinkHoveredId(const uint16_t hoveredId) noexcept #pragma endregion -void AtlasEngine::_resolveAntialiasingMode() noexcept +void AtlasEngine::_resolveTransparencySettings() noexcept { // If the user asks for ClearType, but also for a transparent background // (which our ClearType shader doesn't simultaneously support) // then we need to sneakily force the renderer to grayscale AA. - const auto forceGrayscaleAA = _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE && !_api.backgroundOpaqueMixin; - _api.realizedAntialiasingMode = forceGrayscaleAA ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; + _api.realizedAntialiasingMode = _api.enableTransparentBackground && _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; + // An opaque background allows us to use true "independent" flips. See AtlasEngine::_createSwapChain(). + // We can't enable them if custom shaders are specified, because it's unknown, whether they support opaque inputs. + _api.backgroundOpaqueMixin = _api.enableTransparentBackground || !_api.customPixelShaderPath.empty() || _api.useRetroTerminalEffect ? 0x00000000 : 0xff000000; } void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 92555aec0d6..d8bc042b01b 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -7,6 +7,9 @@ #include #include +#include +#include + #include "../../interactivity/win32/CustomWindowMessages.h" // #### NOTE #### @@ -301,25 +304,6 @@ try } CATCH_RETURN() -[[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept -{ - return debugGeneralPerformance; -} - -void AtlasEngine::WaitUntilCanRender() noexcept -{ - // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. - // Once we've waited on the event, waiting on it again will block until the timeout elapses. - // _r.waitForPresentation guards against this. - if (!debugGeneralPerformance && std::exchange(_r.waitForPresentation, false)) - { - WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); -#ifndef NDEBUG - _r.frameLatencyWaitableObjectUsed = true; -#endif - } -} - [[nodiscard]] HRESULT AtlasEngine::PrepareForTeardown(_Out_ bool* const pForcePaint) noexcept { RETURN_HR_IF_NULL(E_INVALIDARG, pForcePaint); @@ -668,6 +652,85 @@ void AtlasEngine::_createResources() THROW_IF_FAILED(_r.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _r.vertexShader.put())); THROW_IF_FAILED(_r.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _r.pixelShader.put())); + + if (!_api.customPixelShaderPath.empty()) + { + const auto target = _r.device->GetFeatureLevel() == D3D_FEATURE_LEVEL_10_1 ? "ps_4_1" : "ps_5_0"; + static constexpr auto flags = D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS +#ifdef NDEBUG + | D3DCOMPILE_OPTIMIZATION_LEVEL3; +#else + | D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + + wil::com_ptr error; + wil::com_ptr blob; + const auto hr = D3DCompileFromFile( + /* pFileName */ _api.customPixelShaderPath.c_str(), + /* pDefines */ nullptr, + /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, + /* pEntrypoint */ "main", + /* pTarget */ target, + /* Flags1 */ flags, + /* Flags2 */ 0, + /* ppCode */ blob.addressof(), + /* ppErrorMsgs */ error.addressof()); + + if (SUCCEEDED(hr)) + { + THROW_IF_FAILED(_r.device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, _r.customPixelShader.put())); + } + else + { + if (error) + { + LOG_HR_MSG(hr, "%*hs", error->GetBufferSize(), error->GetBufferPointer()); + } + else + { + LOG_HR(hr); + } + + if (_api.warningCallback) + { + _api.warningCallback(D2DERR_SHADER_COMPILE_FAILED); + } + } + + _r.requiresContinuousRedraw = true; + } + else if (_api.useRetroTerminalEffect) + { + THROW_IF_FAILED(_r.device->CreatePixelShader(&custom_shader_ps[0], sizeof(custom_shader_ps), nullptr, _r.customPixelShader.put())); + } + + if (_r.customPixelShader) + { + THROW_IF_FAILED(_r.device->CreateVertexShader(&custom_shader_vs[0], sizeof(custom_shader_vs), nullptr, _r.customVertexShader.put())); + + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(CustomConstBuffer); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.customShaderConstantBuffer.put())); + } + + { + D3D11_SAMPLER_DESC desc{}; + desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; + desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; + desc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; + desc.MaxAnisotropy = 1; + desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + desc.MaxLOD = D3D11_FLOAT32_MAX; + THROW_IF_FAILED(_r.device->CreateSamplerState(&desc, _r.customShaderSamplerState.put())); + } + + _r.customShaderStartTime = std::chrono::steady_clock::now(); + } } WI_ClearFlag(_api.invalidations, ApiInvalidations::Device); @@ -711,10 +774,9 @@ void AtlasEngine::_createSwapChain() desc.BufferCount = 2; desc.Scaling = DXGI_SCALING_NONE; desc.SwapEffect = _sr.isWindows10OrGreater && !_r.d2dMode ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; - // * HWND swap chains can't do alpha. - // * If our background is opaque we can enable "independent" flips by setting DXGI_SWAP_EFFECT_FLIP_DISCARD and DXGI_ALPHA_MODE_IGNORE. - // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. - desc.AlphaMode = _api.hwnd || _api.backgroundOpaqueMixin ? DXGI_ALPHA_MODE_IGNORE : DXGI_ALPHA_MODE_PREMULTIPLIED; + // If our background is opaque we can enable "independent" flips by setting DXGI_SWAP_EFFECT_FLIP_DISCARD and DXGI_ALPHA_MODE_IGNORE. + // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. + desc.AlphaMode = _api.backgroundOpaqueMixin ? DXGI_ALPHA_MODE_IGNORE : DXGI_ALPHA_MODE_PREMULTIPLIED; desc.Flags = debugGeneralPerformance ? 0 : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; wil::com_ptr dxgiFactory; @@ -830,6 +892,20 @@ void AtlasEngine::_recreateSizeDependentResources() THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); THROW_IF_FAILED(_r.device->CreateRenderTargetView(buffer.get(), nullptr, _r.renderTargetView.put())); } + if (_r.customPixelShader) + { + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = _api.sizeInPixel.x; + desc.Height = _api.sizeInPixel.y; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc = { 1, 0 }; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, _r.customOffscreenTexture.addressof())); + THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.customOffscreenTexture.get(), nullptr, _r.customOffscreenTextureView.addressof())); + THROW_IF_FAILED(_r.device->CreateRenderTargetView(_r.customOffscreenTexture.get(), nullptr, _r.customOffscreenTextureTargetView.addressof())); + } // Tell D3D which parts of the render target will be visible. // Everything outside of the viewport will be black. diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 347dab8083d..1f5d2103d41 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -113,6 +113,16 @@ namespace Microsoft::Console::Render ATLAS_POD_OPS(vec2) }; + template + struct vec3 + { + T x{}; + T y{}; + T z{}; + + ATLAS_POD_OPS(vec3) + }; + template struct vec4 { @@ -155,6 +165,7 @@ namespace Microsoft::Console::Render using f32 = float; using f32x2 = vec2; + using f32x3 = vec3; using f32x4 = vec4; struct TextAnalyzerResult @@ -857,6 +868,16 @@ namespace Microsoft::Console::Render #pragma warning(suppress : 4324) // 'ConstBuffer': structure was padded due to alignment specifier }; + struct alignas(16) CustomConstBuffer + { + // WARNING: Same rules as for ConstBuffer above apply. + alignas(sizeof(f32)) f32 time = 0; + alignas(sizeof(f32)) f32 scale = 0; + alignas(sizeof(f32x2)) f32x2 resolution; + alignas(sizeof(f32x4)) f32x4 background; +#pragma warning(suppress : 4324) // 'CustomConstBuffer': structure was padded due to alignment specifier + }; + // Handled in BeginPaint() enum class ApiInvalidations : u8 { @@ -904,11 +925,12 @@ namespace Microsoft::Console::Render bool _emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2); // AtlasEngine.api.cpp - void _resolveAntialiasingMode() noexcept; + void _resolveTransparencySettings() noexcept; void _updateFont(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes); void _resolveFontMetrics(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontMetrics* fontMetrics = nullptr) const; // AtlasEngine.r.cpp + void _renderWithCustomShader() const; void _setShaderResources() const; void _updateConstantBuffer() const noexcept; void _adjustAtlasSize(); @@ -974,6 +996,14 @@ namespace Microsoft::Console::Render wil::com_ptr constantBuffer; wil::com_ptr cellBuffer; wil::com_ptr cellView; + wil::com_ptr customOffscreenTexture; + wil::com_ptr customOffscreenTextureView; + wil::com_ptr customOffscreenTextureTargetView; + wil::com_ptr customVertexShader; + wil::com_ptr customPixelShader; + wil::com_ptr customShaderConstantBuffer; + wil::com_ptr customShaderSamplerState; + std::chrono::steady_clock::time_point customShaderStartTime; // D2D resources wil::com_ptr atlasBuffer; @@ -1013,6 +1043,7 @@ namespace Microsoft::Console::Render i16 scrollOffset = 0; bool d2dMode = false; bool waitForPresentation = false; + bool requiresContinuousRedraw = false; #ifndef NDEBUG // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: @@ -1045,7 +1076,7 @@ namespace Microsoft::Console::Render u16x2 sizeInPixel; // changes are flagged as ApiInvalidations::Size // UpdateDrawingBrushes() - u32 backgroundOpaqueMixin = 0xff000000; // changes are flagged as ApiInvalidations::Device + u32 backgroundOpaqueMixin = 0xff000000; // changes are flagged as ApiInvalidations::SwapChain u32x2 currentColor; AtlasKeyAttributes attributes{}; u16x2 lastPaintBufferLineCoord; @@ -1069,7 +1100,11 @@ namespace Microsoft::Console::Render HWND hwnd = nullptr; u16 dpi = USER_DEFAULT_SCREEN_DPI; // changes are flagged as ApiInvalidations::Font|Size u8 antialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // changes are flagged as ApiInvalidations::Font - u8 realizedAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // caches antialiasingMode, depends on antialiasingMode and backgroundOpaqueMixin, see _resolveAntialiasingMode + u8 realizedAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // caches antialiasingMode, depends on antialiasingMode and backgroundOpaqueMixin, see _resolveTransparencySettings + bool enableTransparentBackground = false; + + std::wstring customPixelShaderPath; // changes are flagged as ApiInvalidations::Device + bool useRetroTerminalEffect = true; // changes are flagged as ApiInvalidations::Device ApiInvalidations invalidations = ApiInvalidations::Device; } _api; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 65709df83f6..c3d8b179e41 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -44,7 +44,8 @@ constexpr bool isInInversionList(const std::array& ranges, wchar_t n return (idx & 1) != 0; } -constexpr D2D1_COLOR_F colorFromU32(uint32_t rgba) +template +constexpr T colorFromU32(uint32_t rgba) { const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; @@ -93,10 +94,15 @@ try _r.deviceContext->Unmap(_r.cellBuffer.get(), 0); } - // After Present calls, the back buffer needs to explicitly be - // re-bound to the D3D11 immediate context before it can be used again. - _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); - _r.deviceContext->Draw(3, 0); + if (_r.customPixelShader) [[unlikely]] + { + _renderWithCustomShader(); + } + else + { + _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); + _r.deviceContext->Draw(3, 0); + } // > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported // > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD. @@ -119,23 +125,103 @@ catch (const wil::ResultException& exception) } CATCH_RETURN() +[[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept +{ + return debugGeneralPerformance || _r.requiresContinuousRedraw; +} + +void AtlasEngine::WaitUntilCanRender() noexcept +{ + // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. + // Once we've waited on the event, waiting on it again will block until the timeout elapses. + // _r.waitForPresentation guards against this. + if (!debugGeneralPerformance && std::exchange(_r.waitForPresentation, false)) + { + WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); +#ifndef NDEBUG + _r.frameLatencyWaitableObjectUsed = true; +#endif + } +} + #pragma endregion -void AtlasEngine::_setShaderResources() const +void AtlasEngine::_renderWithCustomShader() const { - _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); - _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); + // Render with our main shader just like Present(). + { + // OM: Output Merger + _r.deviceContext->OMSetRenderTargets(1, _r.customOffscreenTextureTargetView.addressof(), nullptr); + _r.deviceContext->Draw(3, 0); + } + + // Update the custom shader's constant buffer. + { + CustomConstBuffer data; + data.time = std::chrono::duration(std::chrono::steady_clock::now() - _r.customShaderStartTime).count(); + data.scale = _r.pixelPerDIP; + data.resolution.x = static_cast(_r.cellCount.x * _r.fontMetrics.cellSize.x); + data.resolution.y = static_cast(_r.cellCount.y * _r.fontMetrics.cellSize.y); + data.background = colorFromU32(_r.backgroundColor); + +#pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). + D3D11_MAPPED_SUBRESOURCE mapped; + THROW_IF_FAILED(_r.deviceContext->Map(_r.customShaderConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + assert(mapped.RowPitch >= sizeof(data)); + memcpy(mapped.pData, &data, sizeof(data)); + _r.deviceContext->Unmap(_r.customShaderConstantBuffer.get(), 0); + } + + // Render with the custom shader. + { + // OM: Output Merger + // customOffscreenTextureView was just rendered to via customOffscreenTextureTargetView and is + // set as the output target. Before we can use it as an input we have to remove it as an output. + _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); + + // VS: Vertex Shader + _r.deviceContext->VSSetShader(_r.customVertexShader.get(), nullptr, 0); + + // PS: Pixel Shader + _r.deviceContext->PSSetShader(_r.customPixelShader.get(), nullptr, 0); + _r.deviceContext->PSSetConstantBuffers(0, 1, _r.customShaderConstantBuffer.addressof()); + _r.deviceContext->PSSetShaderResources(0, 1, _r.customOffscreenTextureView.addressof()); + _r.deviceContext->PSSetSamplers(0, 1, _r.customShaderSamplerState.addressof()); + + _r.deviceContext->Draw(4, 0); + } + // For the next frame we need to restore our context state. + { + // VS: Vertex Shader + _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); + + // PS: Pixel Shader + _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); + _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); + const std::array resources{ _r.cellView.get(), _r.atlasView.get() }; + _r.deviceContext->PSSetShaderResources(0, gsl::narrow_cast(resources.size()), resources.data()); + _r.deviceContext->PSSetSamplers(0, 0, nullptr); + } +} + +void AtlasEngine::_setShaderResources() const +{ + // IA: Input Assembler // Our vertex shader uses a trick from Bill Bilodeau published in // "Vertex Shader Tricks" at GDC14 to draw a fullscreen triangle // without vertex/index buffers. This prepares our context for this. _r.deviceContext->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); _r.deviceContext->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); _r.deviceContext->IASetInputLayout(nullptr); - _r.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + _r.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); + // VS: Vertex Shader + _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); + // PS: Pixel Shader + _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); + _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); const std::array resources{ _r.cellView.get(), _r.atlasView.get() }; _r.deviceContext->PSSetShaderResources(0, gsl::narrow_cast(resources.size()), resources.data()); } diff --git a/src/renderer/atlas/atlas.vcxproj b/src/renderer/atlas/atlas.vcxproj index 355a9175240..74a494a7f82 100644 --- a/src/renderer/atlas/atlas.vcxproj +++ b/src/renderer/atlas/atlas.vcxproj @@ -27,6 +27,28 @@ + + Pixel + 4.1 + true + custom_shader_ps + + $(OutDir)$(ProjectName)\%(Filename).h + true + /Zpc %(AdditionalOptions) + /O3 /Qstrip_debug /Qstrip_reflect %(AdditionalOptions) + + + Vertex + 4.1 + true + custom_shader_vs + + $(OutDir)$(ProjectName)\%(Filename).h + true + /Zpc %(AdditionalOptions) + /O3 /Qstrip_debug /Qstrip_reflect %(AdditionalOptions) + true diff --git a/src/renderer/atlas/custom_shader_ps.hlsl b/src/renderer/atlas/custom_shader_ps.hlsl new file mode 100644 index 00000000000..0073f2ca87c --- /dev/null +++ b/src/renderer/atlas/custom_shader_ps.hlsl @@ -0,0 +1,82 @@ +// The original retro pixel shader +Texture2D shaderTexture; +SamplerState samplerState; + +cbuffer PixelShaderSettings +{ + float time; + float scale; + float2 resolution; + float4 background; +}; + +#define SCANLINE_FACTOR 0.5f +#define SCALED_SCANLINE_PERIOD scale +#define SCALED_GAUSSIAN_SIGMA (2.0f * scale) + +static const float M_PI = 3.14159265f; + +float Gaussian2D(float x, float y, float sigma) +{ + return 1 / (sigma * sqrt(2 * M_PI)) * exp(-0.5 * (x * x + y * y) / sigma / sigma); +} + +float4 Blur(Texture2D input, float2 tex_coord, float sigma) +{ + float width, height; + shaderTexture.GetDimensions(width, height); + + float texelWidth = 1.0f / width; + float texelHeight = 1.0f / height; + + float4 color = { 0, 0, 0, 0 }; + + float sampleCount = 13; + + for (float x = 0; x < sampleCount; x++) + { + float2 samplePos = { 0, 0 }; + samplePos.x = tex_coord.x + (x - sampleCount / 2.0f) * texelWidth; + + for (float y = 0; y < sampleCount; y++) + { + samplePos.y = tex_coord.y + (y - sampleCount / 2.0f) * texelHeight; + color += input.Sample(samplerState, samplePos) * Gaussian2D(x - sampleCount / 2.0f, y - sampleCount / 2.0f, sigma); + } + } + + return color; +} + +float SquareWave(float y) +{ + return 1.0f - (floor(y / SCALED_SCANLINE_PERIOD) % 2.0f) * SCANLINE_FACTOR; +} + +float4 Scanline(float4 color, float4 pos) +{ + float wave = SquareWave(pos.y); + + // TODO:GH#3929 make this configurable. + // Remove the && false to draw scanlines everywhere. + if (length(color.rgb) < 0.2f && false) + { + return color + wave * 0.1f; + } + else + { + return color * wave; + } +} + +// clang-format off +float4 main(float4 pos : SV_POSITION, float2 tex : TEXCOORD) : SV_TARGET +// clang-format on +{ + // TODO:GH#3930 Make these configurable in some way. + float4 color = shaderTexture.Sample(samplerState, tex); + color += Blur(shaderTexture, tex, SCALED_GAUSSIAN_SIGMA) * 0.3f; + color = Scanline(color, pos); + + return color; +} diff --git a/src/renderer/atlas/custom_shader_vs.hlsl b/src/renderer/atlas/custom_shader_vs.hlsl new file mode 100644 index 00000000000..5bb9fbff70b --- /dev/null +++ b/src/renderer/atlas/custom_shader_vs.hlsl @@ -0,0 +1,17 @@ +struct VS_OUTPUT +{ + float4 pos : SV_POSITION; + float2 tex : TEXCOORD; +}; + +// clang-format off +VS_OUTPUT main(uint id : SV_VERTEXID) +// clang-format on +{ + VS_OUTPUT output; + // The following two lines are taken from https://gamedev.stackexchange.com/a/77670 + // written by János Turánszki, licensed under CC BY-SA 3.0. + output.tex = float2(id % 2, id % 4 / 2); + output.pos = float4((output.tex.x - 0.5f) * 2.0f, -(output.tex.y - 0.5f) * 2.0f, 0, 1); + return output; +} diff --git a/src/renderer/dx/DxRenderer.cpp b/src/renderer/dx/DxRenderer.cpp index b0a349ff26e..9621dbb8bb7 100644 --- a/src/renderer/dx/DxRenderer.cpp +++ b/src/renderer/dx/DxRenderer.cpp @@ -446,9 +446,9 @@ HRESULT DxEngine::_SetupTerminalEffects() // Sampler state is needed to use texture as input to shader. D3D11_SAMPLER_DESC samplerDesc{}; samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; samplerDesc.MipLODBias = 0.0f; samplerDesc.MaxAnisotropy = 1; samplerDesc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; From 3ad0da8265626bc79fa3a2612ec3305a19f6ab16 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 30 Aug 2022 22:44:06 +0200 Subject: [PATCH 08/10] whoops --- src/renderer/atlas/AtlasEngine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 1f5d2103d41..a9632ac89cf 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -1104,7 +1104,7 @@ namespace Microsoft::Console::Render bool enableTransparentBackground = false; std::wstring customPixelShaderPath; // changes are flagged as ApiInvalidations::Device - bool useRetroTerminalEffect = true; // changes are flagged as ApiInvalidations::Device + bool useRetroTerminalEffect = false; // changes are flagged as ApiInvalidations::Device ApiInvalidations invalidations = ApiInvalidations::Device; } _api; From dd4ce881ad6814e0b8accb8efb0995e505f260a7 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 31 Aug 2022 21:01:38 +0200 Subject: [PATCH 09/10] Add D3D 10.0 support --- src/renderer/atlas/AtlasEngine.cpp | 21 +++++++++++++++++---- src/renderer/atlas/atlas.vcxproj | 8 ++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index d8bc042b01b..d3d4f6c10b2 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -4,11 +4,10 @@ #include "pch.h" #include "AtlasEngine.h" -#include -#include - #include #include +#include +#include #include "../../interactivity/win32/CustomWindowMessages.h" @@ -591,6 +590,7 @@ void AtlasEngine::_createResources() D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, }; auto hr = S_OK; @@ -655,7 +655,20 @@ void AtlasEngine::_createResources() if (!_api.customPixelShaderPath.empty()) { - const auto target = _r.device->GetFeatureLevel() == D3D_FEATURE_LEVEL_10_1 ? "ps_4_1" : "ps_5_0"; + const char* target; + switch (_r.device->GetFeatureLevel()) + { + case D3D_FEATURE_LEVEL_10_0: + target = "ps_4_0"; + break; + case D3D_FEATURE_LEVEL_10_1: + target = "ps_4_1"; + break; + default: + target = "ps_5_0"; + break; + } + static constexpr auto flags = D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS #ifdef NDEBUG | D3DCOMPILE_OPTIMIZATION_LEVEL3; diff --git a/src/renderer/atlas/atlas.vcxproj b/src/renderer/atlas/atlas.vcxproj index 74a494a7f82..e9907d15490 100644 --- a/src/renderer/atlas/atlas.vcxproj +++ b/src/renderer/atlas/atlas.vcxproj @@ -29,7 +29,7 @@ Pixel - 4.1 + 4.0 true custom_shader_ps @@ -40,7 +40,7 @@ Vertex - 4.1 + 4.0 true custom_shader_vs @@ -54,7 +54,7 @@ Pixel - 4.1 + 4.0 true shader_ps @@ -65,7 +65,7 @@ Vertex - 4.1 + 4.0 true shader_vs From 75ea74fad14d4e7068f855510cb75481af3af49a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 31 Aug 2022 21:15:54 +0200 Subject: [PATCH 10/10] Fix AuditMode errors --- src/renderer/atlas/AtlasEngine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index d3d4f6c10b2..e89e9d7fbe2 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -655,7 +655,7 @@ void AtlasEngine::_createResources() if (!_api.customPixelShaderPath.empty()) { - const char* target; + const char* target = nullptr; switch (_r.device->GetFeatureLevel()) { case D3D_FEATURE_LEVEL_10_0: