Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor framebuffer attachment. Fixes Test Drive Unlimited performance #13355

Merged
merged 5 commits into from
Aug 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions GPU/Common/DepalettizeShaderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,20 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
int shift = gstate.getClutIndexShift();
int offset = gstate.getClutIndexStartPos();
GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
// Unfortunately sampling turned our texture into floating point. To avoid this, might be able

// Sampling turns our texture into floating point. To avoid this, might be able
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
// Anyhow, we simply work around this by converting back to integer. Hopefully there will be no loss of precision.
// Anyhow, we simply work around this by converting back to integer, which is fine.
// Use the mask to skip reading some components.

// TODO: Since we actually have higher precision color data here, we might want to apply a dithering pattern here
// in the 5551, 565 and 4444 modes. This would benefit Test Drive which renders at 16-bit on the real hardware
// and dithers immediately, while we render at higher color depth and thus don't dither resulting in banding
// when we sample it at low color depth like this.

// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such
// as those that Test Drive uses for its color remapping. But would need game specific flagging.

int shiftedMask = mask << shift;
switch (pixelFormat) {
case GE_FORMAT_8888:
Expand Down
153 changes: 133 additions & 20 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,15 @@
// GL_UNSIGNED_BYTE/RGBA: AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR (match)
// These are Data::Format:: B4G4R4A4_PACK16, B5G6R6_PACK16, B5G5R5A1_PACK16, R8G8B8A8

// Allow the extra bits from the remasters for the purposes of this.
inline int dimWidth(u16 dim) {
return 1 << (dim & 0xFF);
}

inline int dimHeight(u16 dim) {
return 1 << ((dim >> 8) & 0xFF);
}

// Vulkan color formats:
// TODO
TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
Expand Down Expand Up @@ -533,12 +542,8 @@ void TextureCacheCommon::SetTexture(bool force) {
// Before we go reading the texture from memory, let's check for render-to-texture.
// We must do this early so we have the right w/h.
entry->framebuffer = nullptr;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
auto notificationChannel = (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR;
FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer->fb_address, framebuffer, 0, notificationChannel);
ApplyFramebufferMatch(match, entry, framebuffer->fb_address, framebuffer, notificationChannel);
}

AttachFramebufferToEntry(entry, 0);

// If we ended up with a framebuffer, attach it - no texture decoding needed.
if (entry->framebuffer) {
Expand All @@ -551,6 +556,88 @@ void TextureCacheCommon::SetTexture(bool force) {
nextNeedsRebuild_ = true;
}

bool TextureCacheCommon::AttachFramebufferToEntry(TexCacheEntry *entry, u32 texAddrOffset) {
bool success = false;

std::vector<AttachCandidate> candidates;

bool anyIgnores = false;

FramebufferNotificationChannel channel = (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer->fb_address, framebuffer, texAddrOffset, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
} else if (match.match == FramebufferMatch::IGNORE) {
anyIgnores = true;
}
}

if (!candidates.size()) {
// No candidates at all.
if (anyIgnores) {
// We want to defer the decision, apparently.
return false;
}

// Actively detach the current framebuffer.
if (entry->framebuffer) {
DetachFramebuffer(entry, entry->addr, entry->framebuffer, channel);
}
return false;
}

if (candidates.size() > 1) {
bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH;
WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "AttachFramebufferToEntry(%s): Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s)",
depth ? "DEPTH" : "COLOR", entry->addr, texAddrOffset, dimWidth(entry->dim), dimHeight(entry->dim), entry->bufw, GeTextureFormatToString((GETextureFormat)entry->format));
}

return AttachBestCandidate(candidates);
}

// reason is just used for reporting/logging.
bool TextureCacheCommon::AttachBestCandidate(const std::vector<AttachCandidate> &candidates) {
_dbg_assert_(!candidates.empty());

if (candidates.size() == 1) {
VirtualFramebuffer *framebuffer = candidates[0].fb;
return ApplyFramebufferMatch(candidates[0].match, candidates[0].entry, framebuffer->fb_address, framebuffer, candidates[0].channel);
}

// OK, multiple possible candidates. Will need to figure out which one is the most relevant.
int bestRelevancy = -1;
hrydgard marked this conversation as resolved.
Show resolved Hide resolved
int bestIndex = -1;

for (int i = 0; i < (int)candidates.size(); i++) {
const AttachCandidate &candidate = candidates[i];
int relevancy = 0;
switch (candidate.match.match) {
case FramebufferMatch::VALID:
case FramebufferMatch::VALID_DEPAL:
relevancy += 1000;
break;
case FramebufferMatch::INVALID:
relevancy += 100;
break;
}

// Bonus point for matching stride.
if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->fb_stride == candidate.entry->bufw) {
relevancy += 10;
}

if (relevancy >= bestRelevancy) {
bestRelevancy = relevancy;
bestIndex = i;
}
}

VirtualFramebuffer *framebuffer = candidates[bestIndex].fb;
return ApplyFramebufferMatch(candidates[bestIndex].match, candidates[bestIndex].entry, framebuffer->fb_address, framebuffer, candidates[bestIndex].channel);
}

// Removes old textures.
void TextureCacheCommon::Decimate(bool forcePressure) {
if (--decimationCounter_ <= 0) {
Expand Down Expand Up @@ -665,17 +752,29 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram
switch (msg) {
case NOTIFY_FB_CREATED:
case NOTIFY_FB_UPDATED:
{
// Try to match the new framebuffer to existing textures.
// Backwards from the "usual" texturing case so can't share a utility function.

// Ensure it's in the framebuffer cache.
if (std::find(fbCache_.begin(), fbCache_.end(), framebuffer) == fbCache_.end()) {
// TODO: This is kind of silly. We should probably simply share this list of framebuffers
// with the framebuffer manager.
hrydgard marked this conversation as resolved.
Show resolved Hide resolved
WARN_LOG(G3D, "TextureCache got info about new framebuffer, at %08x", address);
fbCache_.push_back(framebuffer);
}

std::vector<AttachCandidate> candidates;

// TODO: Rework this to not try to "apply" all matches, only the best one.
for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
TexCacheEntry *entry = it->second.get();
FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel);
ApplyFramebufferMatch(match, entry, addr, framebuffer, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
}
}

// Let's assume anything in mirrors is fair game to check.
// TODO: Only do this for depth?
for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
Expand All @@ -684,11 +783,23 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram
if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) {
TexCacheEntry *entry = it->second.get();
FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel);
ApplyFramebufferMatch(match, entry, addr, framebuffer, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
}
}
}
break;

if (!candidates.empty()) {
if (candidates.size() > 1) {
bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH;
WARN_LOG_REPORT_ONCE(multitexcandidate, G3D, "NotifyFramebuffer(%s): Multiple (%d) candidate textures. fb addr: %08x (%dx%d stride %d, %s)",
depth ? "DEPTH" : "COLOR", (int)candidates.size(), addr, framebuffer->width, framebuffer->height, depth ? framebuffer->z_stride : framebuffer->fb_stride, GeBufferFormatToString(framebuffer->format));
}

AttachBestCandidate(candidates);
}
break;
}
case NOTIFY_FB_DESTROYED:
fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end());

Expand Down Expand Up @@ -753,8 +864,9 @@ void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualF
entry->invalidHint = -1;
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
entry->maxLevel = 0;
if (channel == NOTIFY_FB_DEPTH)
if (channel == NOTIFY_FB_DEPTH) {
entry->status |= TexCacheEntry::STATUS_DEPTH;
}
fbTexInfo_[cachekey] = fbInfo;
GPUDebug::NotifyTextureAttachment(entry->addr);
}
Expand Down Expand Up @@ -805,7 +917,16 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry,

u32 addr = address & 0x3FFFFFFF;
u32 texaddr = entry->addr + texaddrOffset;
if (Memory::IsVRAMAddress(entry->addr)) {

bool texInVRAM = Memory::IsVRAMAddress(texaddr);
bool fbInVRAM = Memory::IsVRAMAddress(framebuffer->fb_address);

if (texInVRAM != fbInVRAM) {
// Shortcut. Cannot possibly be a match.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}

if (texInVRAM) {
// This bit controls swizzle. The swizzles at 0x00200000 and 0x00600000 are designed
// to perfectly match reading depth as color (which one to use I think might be related
// to the bpp of the color format used when rendering to it).
Expand Down Expand Up @@ -1011,15 +1132,7 @@ bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) {
}
TexCacheEntry *entry = iter->second.get();

bool success = false;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
FramebufferNotificationChannel channel = (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR;
FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer->fb_address, framebuffer, texaddrOffset, channel);
if (ApplyFramebufferMatch(match, entry, framebuffer->fb_address, framebuffer, channel)) {
success = true;
}
}
bool success = AttachFramebufferToEntry(entry, texaddrOffset);

if (success && entry->framebuffer) {
// This will not apply the texture immediately.
Expand Down
10 changes: 10 additions & 0 deletions GPU/Common/TextureCacheCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ struct FramebufferMatchInfo {
u32 yOffset;
};

struct AttachCandidate {
FramebufferMatchInfo match;
TexCacheEntry *entry;
VirtualFramebuffer *fb;
FramebufferNotificationChannel channel;
};

class TextureCacheCommon {
public:
TextureCacheCommon(Draw::DrawContext *draw);
Expand Down Expand Up @@ -274,8 +281,11 @@ class TextureCacheCommon {

FramebufferMatchInfo MatchFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const;

bool AttachFramebufferToEntry(TexCacheEntry *entry, u32 texAddrOffset);

// Temporary utility during conversion
bool ApplyFramebufferMatch(FramebufferMatchInfo match, TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel);
bool AttachBestCandidate(const std::vector<AttachCandidate> &candidates);

void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const FramebufferMatchInfo &fbInfo, FramebufferNotificationChannel channel);
void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const FramebufferMatchInfo &fbInfo, FramebufferNotificationChannel channel);
Expand Down
6 changes: 6 additions & 0 deletions assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,12 @@ ULJM05049 = true
ULKS46027 = true
ULAS42019 = true

# Test Drive Unlimited
ULET00386 = true
ULUS10249 = true
ULES00637 = true
ULKS46126 = true

# Note! This whole flag is disabled temporarily by appending "Disabled" to its name). See 7914
[YugiohSaveFixDisabled]
# The cause of Yu-gi-oh series 's bad save (cannot save) are load "save status" and use cwcheat,
Expand Down