Skip to content

Commit

Permalink
Enable SSE4 support on Windows #2 (#1274)
Browse files Browse the repository at this point in the history
* Initial check in

Signed-off-by: Darby Johnston <[email protected]>

* Changes to use CMake options for SIMD support

Signed-off-by: Darby Johnston <[email protected]>

* Revert change

Signed-off-by: Darby Johnston <[email protected]>

* Add CpuId test

Signed-off-by: Darby Johnston <[email protected]>

* Add CI SIMD runs

Signed-off-by: Darby Johnston <[email protected]>

* Update CI run names

Signed-off-by: Darby Johnston <[email protected]>

* Fix CI labels

Signed-off-by: Darby Johnston <[email protected]>

* Revert changes in favor of runtime CPU detection

Signed-off-by: Darby Johnston <[email protected]>

Signed-off-by: Darby Johnston <[email protected]>
  • Loading branch information
darbyjohnston authored Oct 11, 2022
1 parent df4f7c9 commit 48166fc
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 22 deletions.
9 changes: 8 additions & 1 deletion src/lib/OpenEXR/ImfHeader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1500,9 +1500,16 @@ staticInitialize ()
V3dAttribute::registerAttributeType ();
V3fAttribute::registerAttributeType ();
V3iAttribute::registerAttributeType ();
DwaCompressor::initializeFuncs ();
IDManifestAttribute::registerAttributeType ();

//
// Register functions, for example specialized functions
// for different CPU architectures.
//

DwaCompressor::initializeFuncs ();
Zip::initializeFuncs ();

initialized = true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib/OpenEXR/ImfSimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
//

// GCC and Visual Studio SSE2 compiler flags
#if defined __SSE2__ || (_MSC_VER >= 1300 && (_M_IX86 || _M_X64))
#if defined __SSE2__ || (_MSC_VER && (_M_IX86 || _M_X64))
# define IMF_HAVE_SSE2 1
#endif

#if defined __SSE4_1__
#if defined __SSE4_1__ || (_MSC_VER && (_M_IX86 || _M_X64))
# define IMF_HAVE_SSE4_1 1
#endif

Expand Down
17 changes: 17 additions & 0 deletions src/lib/OpenEXR/ImfSystemSpecific.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include "ImfSimd.h"
#include "OpenEXRConfig.h"
#include "OpenEXRConfigInternal.h"
#if defined(_MSC_VER)
#include <intrin.h>
#endif

OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER

Expand All @@ -26,6 +29,20 @@ cpuid (int n, int& eax, int& ebx, int& ecx, int& edx)
: /* Clobber */);
}

#elif defined(_MSC_VER)

// Helper functions for MSVC
void
cpuid (int n, int& eax, int& ebx, int& ecx, int& edx)
{
int cpuInfo[4] = { -1 };
__cpuid (cpuInfo, n);
eax = cpuInfo[0];
ebx = cpuInfo[1];
ecx = cpuInfo[2];
edx = cpuInfo[3];
}

#else // IMF_HAVE_SSE2 && __GNUC__ && !__e2k__

// Helper functions for generic compiler - all disabled
Expand Down
55 changes: 36 additions & 19 deletions src/lib/OpenEXR/ImfZip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "ImfCheckedArithmetic.h"
#include "ImfNamespace.h"
#include "ImfSimd.h"
#include "ImfSystemSpecific.h"

#include <math.h>
#include <zlib.h>
Expand Down Expand Up @@ -109,9 +110,12 @@ Zip::compress (const char* raw, int rawSize, char* compressed)
return outSize;
}

namespace
{

#ifdef IMF_HAVE_SSE4_1

static void
void
reconstruct_sse41 (char* buf, size_t outSize)
{
static const size_t bytesPerChunk = sizeof (__m128i);
Expand Down Expand Up @@ -154,9 +158,9 @@ reconstruct_sse41 (char* buf, size_t outSize)
}
}

#else
#endif

static void
void
reconstruct_scalar (char* buf, size_t outSize)
{
unsigned char* t = (unsigned char*) buf + 1;
Expand All @@ -170,11 +174,9 @@ reconstruct_scalar (char* buf, size_t outSize)
}
}

#endif

#ifdef IMF_HAVE_SSE2

static void
void
interleave_sse2 (const char* source, size_t outSize, char* out)
{
static const size_t bytesPerChunk = 2 * sizeof (__m128i);
Expand Down Expand Up @@ -208,9 +210,9 @@ interleave_sse2 (const char* source, size_t outSize, char* out)
}
}

#else
#endif

static void
void
interleave_scalar (const char* source, size_t outSize, char* out)
{
const char* t1 = source;
Expand All @@ -232,7 +234,10 @@ interleave_scalar (const char* source, size_t outSize, char* out)
}
}

#endif
auto reconstruct = reconstruct_scalar;
auto interleave = interleave_scalar;

} // namespace

int
Zip::uncompress (const char* compressed, int compressedSize, char* raw)
Expand All @@ -258,22 +263,34 @@ Zip::uncompress (const char* compressed, int compressedSize, char* raw)
//
// Predictor.
//
#ifdef IMF_HAVE_SSE4_1
reconstruct_sse41 (_tmpBuffer, outSize);
#else
reconstruct_scalar (_tmpBuffer, outSize);
#endif
reconstruct (_tmpBuffer, outSize);

//
// Reorder the pixel data.
//
#ifdef IMF_HAVE_SSE2
interleave_sse2 (_tmpBuffer, outSize, raw);
#else
interleave_scalar (_tmpBuffer, outSize, raw);
#endif
interleave (_tmpBuffer, outSize, raw);

return outSize;
}

void
Zip::initializeFuncs ()
{
CpuId cpuId;

#ifdef IMF_HAVE_SSE4_1
if (cpuId.sse4_1)
{
reconstruct = reconstruct_sse41;
}
#endif

#ifdef IMF_HAVE_SSE2
if (cpuId.sse2)
{
interleave = interleave_sse2;
}
#endif
}

OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
2 changes: 2 additions & 0 deletions src/lib/OpenEXR/ImfZip.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class Zip
//
int uncompress (const char* compressed, int compressedSize, char* raw);

static void initializeFuncs ();

private:
size_t _maxRawSize;
char* _tmpBuffer;
Expand Down
2 changes: 2 additions & 0 deletions src/test/OpenEXRTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ add_executable(OpenEXRTest
testCopyDeepTiled.cpp
testCopyMultiPartFile.cpp
testCopyPixels.cpp
testCpuId.cpp
testCustomAttributes.cpp
testDeepScanLineBasic.cpp
testDeepScanLineHuge.cpp
Expand Down Expand Up @@ -94,6 +95,7 @@ define_openexr_tests(
testCopyDeepTiled
testCopyMultiPartFile
testCopyPixels
testCpuId
testCustomAttributes
testDeepScanLineBasic
testDeepScanLineMultipleRead
Expand Down
2 changes: 2 additions & 0 deletions src/test/OpenEXRTest/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "testCopyDeepTiled.h"
#include "testCopyMultiPartFile.h"
#include "testCopyPixels.h"
#include "testCpuId.h"
#include "testCustomAttributes.h"
#include "testDeepScanLineBasic.h"
#include "testDeepScanLineHuge.h"
Expand Down Expand Up @@ -230,6 +231,7 @@ main (int argc, char* argv[])
TEST (testB44ExpLogTable, "core");
TEST (testDwaLookups, "core");
TEST (testIDManifest, "core");
TEST (testCpuId, "core");

// NB: If you add a test here, make sure to enumerate it in the
// CMakeLists.txt so it runs as part of the test suite
Expand Down
39 changes: 39 additions & 0 deletions src/test/OpenEXRTest/testCpuId.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) Contributors to the OpenEXR Project.
//

#include <ImfSimd.h>
#include <ImfSystemSpecific.h>
#include <iostream>

using namespace std;

void
testCpuId (const string&)
{
#if defined(IMF_HAVE_SSE2)
std::cout << "IMF_HAVE_SSE2: " << true << "\n";
#else
std::cout << "IMF_HAVE_SSE2: " << false << "\n";
#endif
#if defined(IMF_HAVE_SSE4_1)
std::cout << "IMF_HAVE_SSE4_1: " << true << "\n";
#else
std::cout << "IMF_HAVE_SSE4_1: " << false << "\n";
#endif
#if defined(IMF_HAVE_AVX)
std::cout << "IMF_HAVE_AVX: " << true << "\n";
#else
std::cout << "IMF_HAVE_AVX: " << false << "\n";
#endif

Imf::CpuId cpuId;
std::cout << "cpuId.sse2: " << cpuId.sse2 << "\n";
std::cout << "cpuId.sse3: " << cpuId.sse3 << "\n";
std::cout << "cpuId.ssse3: " << cpuId.ssse3 << "\n";
std::cout << "cpuId.sse4_1: " << cpuId.sse4_1 << "\n";
std::cout << "cpuId.sse4_2: " << cpuId.sse4_2 << "\n";
std::cout << "cpuId.avx: " << cpuId.avx << "\n";
std::cout << "cpuId.f16c: " << cpuId.f16c << std::endl;
}
6 changes: 6 additions & 0 deletions src/test/OpenEXRTest/testCpuId.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) Contributors to the OpenEXR Project.
//

void testCpuId (const std::string&);

0 comments on commit 48166fc

Please sign in to comment.