From 2f63f9999d83fbde98570ac430a736098da0317b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 27 Feb 2021 23:51:45 -0800 Subject: [PATCH 1/3] GPU: Normalize 0 to 1 always in software lighting. See #14167. This seems to be consistent. --- GPU/Common/SoftwareTransformCommon.cpp | 13 +++++------ GPU/Common/TransformCommon.cpp | 8 +++---- GPU/Math3D.cpp | 30 ++++++++++++++++++++++++++ GPU/Math3D.h | 2 ++ GPU/Software/Lighting.cpp | 14 ++++++------ GPU/Software/TransformUnit.cpp | 3 ++- 6 files changed, 50 insertions(+), 20 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 89a4606450c7..ccadf1f18817 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -17,6 +17,7 @@ #include #include +#include "Common/CPUDetect.h" #include "Common/Math/math_util.h" #include "Common/GPU/OpenGL/GLFeatures.h" @@ -266,7 +267,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt normal = -normal; } Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); - worldnormal = worldnormal.Normalized(); + worldnormal = worldnormal.NormalizedOr001(cpu_info.bSSE4_1); } } else { float weights[8]; @@ -298,7 +299,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt normal = -normal; } Norm3ByMatrix43(worldnormal.AsArray(), normal.AsArray(), gstate.worldMatrix); - worldnormal = worldnormal.Normalized(); + worldnormal = worldnormal.NormalizedOr001(cpu_info.bSSE4_1); } } @@ -358,7 +359,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source - source = normal.Normalized(); + source = normal.NormalizedOr001(cpu_info.bSSE4_1); if (!reader.hasNormal()) { ERROR_LOG_REPORT(G3D, "Normal projection mapping without normal?"); } @@ -391,11 +392,7 @@ void SoftwareTransform::Decode(int prim, u32 vertType, const DecVtxFormat &decVt }; auto calcShadingLPos = [&](int l) { Vec3f pos = getLPos(l); - if (pos.Length2() == 0.0f) { - return Vec3f(0.0f, 0.0f, 1.0f); - } else { - return pos.Normalized(); - } + return pos.NormalizedOr001(cpu_info.bSSE4_1); }; // Might not have lighting enabled, so don't use lighter. Vec3f lightpos0 = calcShadingLPos(gstate.getUVLS0()); diff --git a/GPU/Common/TransformCommon.cpp b/GPU/Common/TransformCommon.cpp index 7e74f321a7e5..85031c8e0eee 100644 --- a/GPU/Common/TransformCommon.cpp +++ b/GPU/Common/TransformCommon.cpp @@ -17,6 +17,7 @@ #include +#include "Common/CPUDetect.h" #include "GPU/GPUState.h" #include "GPU/Common/TransformCommon.h" @@ -140,7 +141,7 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ case GE_LIGHTTYPE_SPOT: case GE_LIGHTTYPE_UNKNOWN: lightDir = Vec3Packedf(&ldir[l * 3]); - angle = Dot(toLight.Normalized(), lightDir.Normalized()); + angle = Dot(toLight.NormalizedOr001(cpu_info.bSSE4_1), lightDir.NormalizedOr001(cpu_info.bSSE4_1)); if (angle >= lcutoff[l]) lightScale = clamp(1.0f / (latt[l * 3] + latt[l * 3 + 1] * distanceToLight + latt[l * 3 + 2] * distanceToLight*distanceToLight), 0.0f, 1.0f) * powf(angle, lconv[l]); break; @@ -155,11 +156,10 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[ // Real PSP specular Vec3f toViewer(0, 0, 1); // Better specular - // Vec3f toViewer = (viewer - pos).Normalized(); + // Vec3f toViewer = (viewer - pos).NormalizedOr001(cpu_info.bSSE4_1); if (doSpecular) { - Vec3f halfVec = (toLight + toViewer); - halfVec.Normalize(); + Vec3f halfVec = (toLight + toViewer).NormalizedOr001(cpu_info.bSSE4_1); dot = Dot(halfVec, norm); if (dot > 0.0f) { diff --git a/GPU/Math3D.cpp b/GPU/Math3D.cpp index c46a4e520d41..811fad4923da 100644 --- a/GPU/Math3D.cpp +++ b/GPU/Math3D.cpp @@ -138,12 +138,31 @@ Vec3 Vec3::Normalized(bool useSSE4) const const __m128 normalize = SSENormalizeMultiplier(useSSE4, vec); return _mm_mul_ps(normalize, vec); } + +template<> +Vec3 Vec3::NormalizedOr001(bool useSSE4) const { + const __m128 normalize = SSENormalizeMultiplier(useSSE4, vec); + const __m128 result = _mm_mul_ps(normalize, vec); + const __m128 mask = _mm_cmpunord_ps(result, vec); + const __m128 replace = _mm_and_ps(_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f), mask); + // Replace with the constant if the mask matched. + return _mm_or_ps(_mm_andnot_ps(mask, result), replace); +} #else template<> Vec3 Vec3::Normalized(bool useSSE4) const { return (*this) / Length(); } + +template<> +Vec3 Vec3::NormalizedOr001(bool useSSE4) const { + float len = Length(); + if (len == 0.0f) { + return Vec3(0.0f, 0.0f, 1.0f); + } + return *this / len; +} #endif template<> @@ -154,6 +173,17 @@ float Vec3::Normalize() return len; } +template<> +float Vec3::NormalizeOr001() { + float len = Length(); + if (len == 0.0f) { + z = 1.0f; + } else { + *this /= len; + } + return len; +} + template<> Vec3Packed Vec3Packed::FromRGB(unsigned int rgb) { diff --git a/GPU/Math3D.h b/GPU/Math3D.h index 873119613d4d..62b017d15d22 100644 --- a/GPU/Math3D.h +++ b/GPU/Math3D.h @@ -297,7 +297,9 @@ class Vec3 Vec3 WithLength(const float l) const; float Distance2To(Vec3 &other); Vec3 Normalized(bool useSSE4 = false) const; + Vec3 NormalizedOr001(bool useSSE4 = false) const; float Normalize(); // returns the previous length, which is often useful + float NormalizeOr001(); T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) { diff --git a/GPU/Software/Lighting.cpp b/GPU/Software/Lighting.cpp index 4bc4fe157695..5c60bd66dc4d 100644 --- a/GPU/Software/Lighting.cpp +++ b/GPU/Software/Lighting.cpp @@ -15,9 +15,9 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -#include "../GPUState.h" - -#include "Lighting.h" +#include "Common/CPUDetect.h" +#include "GPU/GPUState.h" +#include "GPU/Software/Lighting.h" namespace Lighting { @@ -53,7 +53,7 @@ void Process(VertexData& vertex, bool hasColor) { if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) { Vec3 L = GetLightVec(gstate.lpos, light); // In other words, L.Length2() == 0.0f means Dot({0, 0, 1}, worldnormal). - float diffuse_factor = L.Length2() == 0.0f ? vertex.worldnormal.z : Dot(L.Normalized(), vertex.worldnormal); + float diffuse_factor = Dot(L.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal); if (gstate.getUVLS0() == (int)light) vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f; @@ -77,7 +77,7 @@ void Process(VertexData& vertex, bool hasColor) { L -= vertex.worldpos; } // TODO: Should this normalize (0, 0, 0) to (0, 0, 1)? - float d = L.Normalize(); + float d = L.NormalizeOr001(); float att = 1.f; if (!gstate.isDirectionalLight(light)) { @@ -89,7 +89,7 @@ void Process(VertexData& vertex, bool hasColor) { float spot = 1.f; if (gstate.isSpotLight(light)) { Vec3 dir = GetLightVec(gstate.ldir, light); - float rawSpot = dir.Length2() == 0.0f ? 0.0f : Dot(dir.Normalized(), L); + float rawSpot = Dot(dir.NormalizedOr001(cpu_info.bSSE4_1), L); float cutoff = getFloat24(gstate.lcutoff[light]); if (rawSpot >= cutoff) { float conv = getFloat24(gstate.lconv[light]); @@ -123,7 +123,7 @@ void Process(VertexData& vertex, bool hasColor) { Vec3 lsc = Vec3::FromRGB(gstate.getSpecularColor(light)); Vec3 msc = (materialupdate & 4) ? vcol0 : Vec3::FromRGB(gstate.getMaterialSpecular()); - float specular_factor = Dot(H.Normalized(), vertex.worldnormal); + float specular_factor = Dot(H.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal); float k = gstate.getMaterialSpecularCoef(); specular_factor = pspLightPow(specular_factor, k); diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 31c298ec3e4f..65baa6f363ab 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -18,6 +18,7 @@ #include #include +#include "Common/CPUDetect.h" #include "Common/Math/math_util.h" #include "Common/MemoryUtil.h" #include "Core/Config.h" @@ -256,7 +257,7 @@ VertexData TransformUnit::ReadVertex(VertexReader& vreader) break; case GE_PROJMAP_NORMALIZED_NORMAL: - source = vertex.normal.Normalized(); + source = vertex.normal.NormalizedOr001(cpu_info.bSSE4_1); break; case GE_PROJMAP_NORMAL: From 3d8e44436816069f3319b3840f3d9f0b51828593 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 27 Feb 2021 23:52:45 -0800 Subject: [PATCH 2/3] GE Debugger: Correct powered diffuse display. --- Windows/GEDebugger/TabState.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Windows/GEDebugger/TabState.cpp b/Windows/GEDebugger/TabState.cpp index c8866ad48003..275adfd56545 100644 --- a/Windows/GEDebugger/TabState.cpp +++ b/Windows/GEDebugger/TabState.cpp @@ -744,7 +744,7 @@ void FormatStateRow(wchar_t *dest, const TabStateRow &info, u32 value, bool enab const char *lightComputations[] = { "diffuse", "diffuse + spec", - "pow(diffuse) + spec", + "pow(diffuse)", "unknown (diffuse?)", }; const char *lightTypes[] = { From 64484a59e642a691e376e75bcaf005e6348cebe7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 28 Feb 2021 00:05:10 -0800 Subject: [PATCH 3/3] GPU: Normalize zero consistently in hw transform. --- GPU/Common/VertexShaderGenerator.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 33fccab57b78..fffd0898b550 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -722,6 +722,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, "}\n"); } + if (useHWTransform) { + WRITE(p, "vec3 normalizeOr001(vec3 v) {\n"); + WRITE(p, " return length(v) == 0.0 ? vec3(0.0, 0.0, 1.0) : normalize(v);\n"); + WRITE(p, "}\n"); + } + if (ShaderLanguageIsOpenGL(compat.shaderLanguage) || compat.shaderLanguage == GLSL_VULKAN) { WRITE(p, "void main() {\n"); } else if (compat.shaderLanguage == HLSL_D3D9 || compat.shaderLanguage == HLSL_D3D11) { @@ -798,7 +804,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " vec3 worldpos = mul(vec4(tess.pos.xyz, 1.0), u_world).xyz;\n"); if (hasNormalTess) { - WRITE(p, " mediump vec3 worldnormal = normalize(mul(vec4(%stess.nrm, 0.0), u_world).xyz);\n", flipNormalTess ? "-" : ""); + WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(%stess.nrm, 0.0), u_world).xyz);\n", flipNormalTess ? "-" : ""); } else { WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } @@ -806,7 +812,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag // No skinning, just standard T&L. WRITE(p, " vec3 worldpos = mul(vec4(position, 1.0), u_world).xyz;\n"); if (hasNormal) - WRITE(p, " mediump vec3 worldnormal = normalize(mul(vec4(%snormal, 0.0), u_world).xyz);\n", flipNormal ? "-" : ""); + WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(%snormal, 0.0), u_world).xyz);\n", flipNormal ? "-" : ""); else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } @@ -847,7 +853,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } else { WRITE(p, " mediump vec3 skinnednormal = mul(vec4(0.0, 0.0, %s1.0, 0.0), skinMatrix).xyz%s;\n", flipNormal ? "-" : "", factor); } - WRITE(p, " mediump vec3 worldnormal = normalize(mul(vec4(skinnednormal, 0.0), u_world).xyz);\n"); + WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(skinnednormal, 0.0), u_world).xyz);\n"); } WRITE(p, " vec4 viewPos = vec4(mul(vec4(worldpos, 1.0), u_view).xyz, 1.0);\n"); @@ -1056,7 +1062,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag break; case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized transformed normal as source if (hasNormal) - temp_tc = flipNormal ? "vec4(normalize(-normal), 1.0)" : "vec4(normalize(normal), 1.0)"; + temp_tc = StringFromFormat("length(%snormal) == 0.0 ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(normalize(%snormal), 1.0)", flipNormal ? "-" : ""); else temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)"; break;