Skip to content

Commit

Permalink
Merge pull request #13353 from unknownbrackets/texcache
Browse files Browse the repository at this point in the history
TexCache: Improve QuickTexHash a little, cleanup ARM64 defines
  • Loading branch information
hrydgard authored Aug 29, 2020
2 parents 5945642 + c5738ca commit e33c31e
Show file tree
Hide file tree
Showing 25 changed files with 42 additions and 54 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ if(CMAKE_SYSTEM_PROCESSOR)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch64")
set(ARM64 ON)
add_definitions(-DARM64)
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^amd64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "^x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "^AMD64")
set(X86_DEVICE ON)
set(X86_64_DEVICE ON)
Expand Down
5 changes: 3 additions & 2 deletions Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <set>
#include <sstream>

#include "ppsspp_config.h"
#include "base/display.h"
#include "base/NativeApp.h"
#include "file/ini_file.h"
Expand Down Expand Up @@ -393,15 +394,15 @@ static int DefaultNumWorkers() {
}

static int DefaultCpuCore() {
#if defined(ARM) || defined(ARM64) || defined(_M_IX86) || defined(_M_X64)
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
return (int)CPUCore::JIT;
#else
return (int)CPUCore::INTERPRETER;
#endif
}

static bool DefaultCodeGen() {
#if defined(ARM) || defined(ARM64) || defined(_M_IX86) || defined(_M_X64)
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
return true;
#else
return false;
Expand Down
2 changes: 1 addition & 1 deletion Core/MIPS/ARM64/Arm64Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ void Arm64Jit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
}

bool Arm64Jit::ReplaceJalTo(u32 dest) {
#ifdef ARM64
#if PPSSPP_ARCH(ARM64)
const ReplacementTableEntry *entry = nullptr;
u32 funcSize = 0;
if (!CanReplaceJalTo(dest, &entry, &funcSize)) {
Expand Down
12 changes: 6 additions & 6 deletions Core/MIPS/JitCommon/JitBlockCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,12 @@ void JitBlockCache::InvalidateChangedBlocks() {
}

int JitBlockCache::GetBlockExitSize() {
#if defined(ARM)
#if PPSSPP_ARCH(ARM)
// Will depend on the sequence found to encode the destination address.
return 0;
#elif defined(_M_IX86) || defined(_M_X64)
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
return 15;
#elif defined(ARM64)
#elif PPSSPP_ARCH(ARM64)
// Will depend on the sequence found to encode the destination address.
return 0;
#else
Expand Down Expand Up @@ -654,11 +654,11 @@ JitBlockDebugInfo JitBlockCache::GetBlockDebugInfo(int blockNum) const {
debugInfo.origDisasm.push_back(mipsDis);
}

#if defined(ARM)
#if PPSSPP_ARCH(ARM)
debugInfo.targetDisasm = DisassembleArm2(block->normalEntry, block->codeSize);
#elif defined(ARM64)
#elif PPSSPP_ARCH(ARM64)
debugInfo.targetDisasm = DisassembleArm64(block->normalEntry, block->codeSize);
#elif defined(_M_IX86) || defined(_M_X64)
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
debugInfo.targetDisasm = DisassembleX86(block->normalEntry, block->codeSize);
#endif

Expand Down
3 changes: 2 additions & 1 deletion Core/MIPS/JitCommon/JitBlockCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@
#include <vector>
#include <string>

#include "ppsspp_config.h"
#include "Common/CommonTypes.h"
#include "Common/CodeBlock.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPS.h"

#if defined(ARM) || defined(ARM64)
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64)
const int MAX_JIT_BLOCK_EXITS = 2;
#else
const int MAX_JIT_BLOCK_EXITS = 8;
Expand Down
3 changes: 2 additions & 1 deletion GPU/Common/TextureDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ enum CheckAlphaResult {
CHECKALPHA_ANY = 4,
};

#include "ppsspp_config.h"
#include "Common/Common.h"
#include "Common/Swap.h"
#include "Core/MemMap.h"
Expand All @@ -46,7 +47,7 @@ void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32
#define DoUnswizzleTex16 DoUnswizzleTex16Basic

// For ARM64, NEON is mandatory, so we also statically link.
#elif PPSSPP_ARCH(ARM64) || defined(ARM64)
#elif PPSSPP_ARCH(ARM64)
#define DoQuickTexHash QuickTexHashNEON
#define StableQuickTexHash QuickTexHashNEON
#define DoUnswizzleTex16 DoUnswizzleTex16NEON
Expand Down
9 changes: 6 additions & 3 deletions GPU/Common/TextureDecoderNEON.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
__builtin_prefetch(checkp, 0, 0);

if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
#if defined(IOS) || PPSSPP_ARCH(ARM64) || defined(_MSC_VER)
#if defined(IOS) || PPSSPP_ARCH(ARM64) || defined(_MSC_VER) || !PPSSPP_ARCH(ARMV7)
uint32x4_t cursor = vdupq_n_u32(0);
uint16x8_t cursor2 = vld1q_u16(QuickTexHashInitial);
uint16x8_t update = vdupq_n_u16(0x2455U);

const u32 *p = (const u32 *)checkp;
for (u32 i = 0; i < size / 16; i += 4) {
const u32 *pend = p + size / 4;
while (p < pend) {
cursor = vreinterpretq_u32_u16(vmlaq_u16(vreinterpretq_u16_u32(cursor), vreinterpretq_u16_u32(vld1q_u32(&p[4 * 0])), cursor2));
cursor = veorq_u32(cursor, vld1q_u32(&p[4 * 1]));
cursor = vaddq_u32(cursor, vld1q_u32(&p[4 * 2]));
Expand All @@ -58,10 +59,12 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
}

cursor = vaddq_u32(cursor, vreinterpretq_u32_u16(cursor2));
check = vgetq_lane_u32(cursor, 0) + vgetq_lane_u32(cursor, 1) + vgetq_lane_u32(cursor, 2) + vgetq_lane_u32(cursor, 3);
uint32x2_t mixed = vadd_u32(vget_high_u32(cursor), vget_low_u32(cursor));
check = vget_lane_u32(mixed, 0) + vget_lane_u32(mixed, 1);
#else
// TODO: Why does this crash on iOS, but only certain devices?
// It's faster than the above, but I guess it sucks to be using an iPhone.
// As of 2020 clang, it's still faster by ~1.4%.

// d0/d1 (q0) - cursor
// d2/d3 (q1) - cursor2
Expand Down
5 changes: 0 additions & 5 deletions GPU/Common/VertexDecoderArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@
#include "ppsspp_config.h"
#if PPSSPP_ARCH(ARM64)

// This allows highlighting to work. Yay.
#ifdef __INTELLISENSE__
#define ARM64
#endif

#include "Common/CPUDetect.h"
#include "Common/Log.h"
#include "Core/Config.h"
Expand Down
7 changes: 4 additions & 3 deletions GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <algorithm>
#include <cstdio>

#include "ppsspp_config.h"
#include "base/basictypes.h"

#include "Common/Log.h"
Expand Down Expand Up @@ -1345,11 +1346,11 @@ std::string VertexDecoder::GetString(DebugShaderStringType stringType) {
if (!jitted_)
return "Not compiled";
std::vector<std::string> lines;
#if defined(ARM64)
#if PPSSPP_ARCH(ARM64)
lines = DisassembleArm64((const u8 *)jitted_, jittedSize_);
#elif defined(ARM)
#elif PPSSPP_ARCH(ARM)
lines = DisassembleArm2((const u8 *)jitted_, jittedSize_);
#elif defined(MIPS)
#elif PPSSPP_ARCH(MIPS) || PPSSPP_ARCH(MIPS64)
// No MIPS disassembler defined
#else
lines = DisassembleX86((const u8 *)jitted_, jittedSize_);
Expand Down
3 changes: 2 additions & 1 deletion UI/DevScreens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <algorithm>

#include "ppsspp_config.h"
#include "base/display.h"
#include "base/stringutil.h"
#include "gfx_es2/gpu_features.h"
Expand Down Expand Up @@ -458,7 +459,7 @@ void SystemInfoScreen::CreateViews() {

deviceSpecs->Add(new ItemHeader(si->T("CPU Information")));
deviceSpecs->Add(new InfoItem(si->T("CPU Name", "Name"), cpu_info.brand_string));
#if defined(ARM) || defined(ARM64) || defined(MIPS)
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(MIPS) || PPSSPP_ARCH(MIPS64)
deviceSpecs->Add(new InfoItem(si->T("Cores"), StringFromInt(cpu_info.num_cores)));
#else
int totalThreads = cpu_info.num_cores * cpu_info.logical_cpu_count;
Expand Down
8 changes: 4 additions & 4 deletions UI/NativeApp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ static UI::Theme ui_theme;

Atlas g_ui_atlas;

#if defined(ARM) && defined(__ANDROID__)
#if PPSSPP_ARCH(ARM) && defined(__ANDROID__)
#include "../../android/jni/ArmEmitterTest.h"
#elif defined(ARM64) && defined(__ANDROID__)
#elif PPSSPP_ARCH(ARM64) && defined(__ANDROID__)
#include "../../android/jni/Arm64EmitterTest.h"
#endif

Expand Down Expand Up @@ -291,9 +291,9 @@ void NativeGetAppInfo(std::string *app_dir_name, std::string *app_nice_name, boo
*landscape = true;
*version = PPSSPP_GIT_VERSION;

#if defined(ARM) && defined(__ANDROID__)
#if PPSSPP_ARCH(ARM) && defined(__ANDROID__)
ArmEmitterTest();
#elif defined(ARM64) && defined(__ANDROID__)
#elif PPSSPP_ARCH(ARM64) && defined(__ANDROID__)
Arm64EmitterTest();
#endif
}
Expand Down
2 changes: 1 addition & 1 deletion Windows/GPU/D3D11Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ HRESULT D3D11Context::CreateTheDevice(IDXGIAdapter *adapter) {
// D3D11 has no need for display rotation.
g_display_rotation = DisplayRotation::ROTATE_0;
g_display_rot_matrix.setIdentity();
#if defined(_DEBUG) && !defined(_M_ARM) && !defined(_M_ARM64)
#if defined(_DEBUG) && !PPSSPP_ARCH(ARM) && !PPSSPP_ARCH(ARM64)
UINT createDeviceFlags = D3D11_CREATE_DEVICE_DEBUG;
#else
UINT createDeviceFlags = 0;
Expand Down
4 changes: 1 addition & 3 deletions android/jni/Locals.mk
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ ifeq ($(TARGET_ARCH_ABI),x86_64)
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/x86_64/lib/libavutil.a
LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/x86_64/include

LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -D_M_X64 -fomit-frame-pointer -mtune=atom -mfpmath=sse -mssse3 -mstackrealign
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_M_X64 -fomit-frame-pointer -mtune=atom -mfpmath=sse -mssse3 -mstackrealign
endif

ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
Expand All @@ -80,8 +80,6 @@ ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include

LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
endif

# Compile with profiling.
Expand Down
1 change: 0 additions & 1 deletion cmake/Toolchains/beagleboard.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
set(ARMV7 ON)
set(USING_FBDEV ON)

set(ARCH_FLAGS "-march=armv7-a -mfpu=neon -mcpu=cortex-a8")
Expand Down
2 changes: 0 additions & 2 deletions cmake/Toolchains/generic.armv7.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
set(ARMV7 ON)

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon -mcpu=cortex-a9")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -mcpu=cortex-a9")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv7-a -mfpu=neon -mcpu=cortex-a9")
1 change: 0 additions & 1 deletion cmake/Toolchains/raspberry.armv7.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,4 @@ if(NOT PPSSPP_PI_MODEL4)
set(USING_FBDEV ON)
endif()
set(USING_GLES2 ON)
set(ARMV7 ON)
set(USE_WAYLAND_WSI OFF)
1 change: 0 additions & 1 deletion cmake/Toolchains/raspberry.armv8.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,4 @@ if(NOT PPSSPP_PI_MODEL4)
set(USING_FBDEV ON)
endif()
set(USING_GLES2 ON)
set(ARMV7 ON)
set(USE_WAYLAND_WSI OFF)
1 change: 0 additions & 1 deletion cmake/Toolchains/vero4k.armv8.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,5 @@ set(EGL_LIBRARIES /opt/vero3/lib/libEGL.so)
set(USING_GLES2 ON)
set(USING_EGL ON)
set(USING_FBDEV ON)
set(ARMV7 ON)
set(FORCED_CPU armv7)
set(USING_X11_VULKAN OFF CACHE BOOL "" FORCE)
2 changes: 0 additions & 2 deletions ext/glslang-build/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -DARM -DARMEABI_V7A
else ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -DARM -DARMEABI -march=armv6
else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
else ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_M_IX86
else ifeq ($(TARGET_ARCH_ABI),x86_64)
Expand Down
2 changes: 0 additions & 2 deletions ext/miniupnp-build/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -DARM -DARMEABI_V7A
else ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -DARM -DARMEABI -march=armv6
else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
else ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_M_IX86
else ifeq ($(TARGET_ARCH_ABI),x86_64)
Expand Down
2 changes: 0 additions & 2 deletions ext/native/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,6 @@ LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) \

else ifeq ($(TARGET_ARCH_ABI),armeabi)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -DARM -DARMEABI -march=armv6
else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
else ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_M_IX86
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) \
Expand Down
4 changes: 3 additions & 1 deletion ext/native/gfx/gl_common.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include "ppsspp_config.h"

#ifdef IOS
#include <OpenGLES/ES3/gl.h>
#include <OpenGLES/ES3/glext.h>
Expand Down Expand Up @@ -51,7 +53,7 @@ extern PFNGLMAPBUFFERPROC glMapBuffer;

typedef void (EGLAPIENTRYP PFNGLDRAWTEXTURENVPROC) (GLuint texture, GLuint sampler, GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1, GLfloat z, GLfloat s0, GLfloat t0, GLfloat s1, GLfloat t1);
extern PFNGLDRAWTEXTURENVPROC glDrawTextureNV;
#ifndef ARM64
#if !PPSSPP_ARCH(ARM64)
typedef void (EGLAPIENTRYP PFNGLBLITFRAMEBUFFERNVPROC) (
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
Expand Down
4 changes: 0 additions & 4 deletions ppsspp_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@
#define PPSSPP_ARCH_ARM64 1
#define PPSSPP_ARCH_64BIT 1
#define PPSSPP_ARCH_ARM_NEON 1
//TODO: Remove this compat define
#ifndef ARM64
#define ARM64 1
#endif
#endif

#if defined(__mips64__)
Expand Down
5 changes: 3 additions & 2 deletions unittest/JitHarness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <algorithm>

#include "ppsspp_config.h"
#include "base/NativeApp.h"
#include "Common/TimeUtil.h"
#include "Core/ConfigValues.h"
Expand Down Expand Up @@ -172,9 +173,9 @@ bool TestJit() {
// Disassemble
JitBlockCache *cache = MIPSComp::jit->GetBlockCache();
JitBlock *block = cache->GetBlock(0); // Should only be one block.
#if defined(ARM)
#if PPSSPP_ARCH(ARM)
std::vector<std::string> lines = DisassembleArm2(block->normalEntry, block->codeSize);
#elif defined(ARM64)
#elif PPSSPP_ARCH(ARM64)
std::vector<std::string> lines = DisassembleArm64(block->normalEntry, block->codeSize);
#else
std::vector<std::string> lines = DisassembleX86(block->normalEntry, block->codeSize);
Expand Down
7 changes: 4 additions & 3 deletions unittest/UnitTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <jni.h>
#endif

#include "ppsspp_config.h"
#include "base/NativeApp.h"
#include "input/input_state.h"
#include "ext/disarm.h"
Expand Down Expand Up @@ -559,13 +560,13 @@ bool TestArm64Emitter();
bool TestX64Emitter();

TestItem availableTests[] = {
#if defined(ARM64) || defined(_M_X64) || defined(_M_IX86)
#if PPSSPP_ARCH(ARM64) || PPSSPP_ARCH(AMD64) || PPSSPP_ARCH(X86)
TEST_ITEM(Arm64Emitter),
#endif
#if defined(ARM) || defined(_M_X64) || defined(_M_IX86)
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(AMD64) || PPSSPP_ARCH(X86)
TEST_ITEM(ArmEmitter),
#endif
#if defined(_M_X64) || defined(_M_IX86)
#if PPSSPP_ARCH(AMD64) || PPSSPP_ARCH(X86)
TEST_ITEM(X64Emitter),
#endif
TEST_ITEM(VertexJit),
Expand Down

0 comments on commit e33c31e

Please sign in to comment.