Skip to content

Commit

Permalink
Merge branch 'extra_decoders' of github.com:nicolashug/vision into ex…
Browse files Browse the repository at this point in the history
…tra_decoders
  • Loading branch information
NicolasHug committed Nov 21, 2024
2 parents 097c68f + ef737c4 commit 937f2ab
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 90 deletions.
4 changes: 4 additions & 0 deletions .github/scripts/cmake.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ else
JOBS=$(nproc)
fi

if [[ $OS_TYPE == linux ]]; then
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}"
fi

TORCH_PATH=$(python -c "import pathlib, torch; print(pathlib.Path(torch.__path__[0]))")
if [[ $OS_TYPE == windows ]]; then
PACKAGING_DIR="${PWD}/packaging"
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/build-cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
gpu-arch-type: cuda
gpu-arch-version: "11.8"
fail-fast: false
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
runner: ${{ matrix.runner }}
Expand All @@ -33,7 +33,6 @@ jobs:
export PYTHON_VERSION=3.9
export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
./.github/scripts/cmake.sh
macos:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ on:

jobs:
build:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
upload-artifact: docs
Expand Down Expand Up @@ -77,11 +77,11 @@ jobs:
upload:
needs: build
if: github.repository == 'pytorch/vision' && github.event_name == 'push' &&
if: github.repository == 'pytorch/vision' && github.event_name == 'push' &&
((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
permissions:
contents: write
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
download-artifact: docs
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:

jobs:
python-source-and-configs:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
test-infra-ref: main
Expand All @@ -38,7 +38,7 @@ jobs:
fi
c-source:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
test-infra-ref: main
Expand All @@ -65,7 +65,7 @@ jobs:
python-types:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
test-infra-ref: main
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/prototype-tests-linux-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
gpu-arch-type: cuda
gpu-arch-version: "11.8"
fail-fast: false
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
runner: ${{ matrix.runner }}
Expand All @@ -37,7 +37,7 @@ jobs:
export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
./.github/scripts/setup-env.sh
# Prepare conda
CONDA_PATH=$(which conda)
eval "$(${CONDA_PATH} shell.bash hook)"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
gpu-arch-type: cuda
gpu-arch-version: "11.8"
fail-fast: false
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/vision
runner: ${{ matrix.runner }}
Expand Down Expand Up @@ -104,7 +104,7 @@ jobs:
# ./.github/scripts/unittest.sh

# onnx:
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
# with:
# repository: pytorch/vision
# test-infra-ref: main
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:
# echo '::endgroup::'

# unittests-extended:
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
# if: contains(github.event.pull_request.labels.*.name, 'run-extended')
# with:
# repository: pytorch/vision
Expand Down
4 changes: 1 addition & 3 deletions torchvision/csrc/io/image/cpu/decode_webp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,10 @@ torch::Tensor decode_webp(

auto decoded_data =
decoding_func(encoded_data_p, encoded_data_size, &width, &height);

TORCH_CHECK(decoded_data != nullptr, "WebPDecodeRGB[A] failed.");

auto deleter = [decoded_data](void*) { WebPFree(decoded_data); };
auto out = torch::from_blob(
decoded_data, {height, width, num_channels}, deleter, torch::kUInt8);
decoded_data, {height, width, num_channels}, torch::kUInt8);

return out.permute({2, 0, 1});
}
Expand Down
87 changes: 14 additions & 73 deletions torchvision/csrc/ops/mps/mps_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace ops {

namespace mps {

static const char* METAL_VISION = R"VISION_METAL(
static at::native::mps::MetalShaderLibrary lib(R"VISION_METAL(
#include <metal_atomic>
#include <metal_stdlib>
Expand All @@ -26,46 +26,15 @@ inline T ceil_div(T n, T m) {
return (n + m - 1) / m;
}
template <typename T>
inline void atomic_add_float( device T* data_ptr, const T val)
inline void atomic_add_float(device float* data_ptr, const float val)
{
#if __METAL_VERSION__ >= 300
// atomic_float is supported in Metal 3 (macOS Ventura) onward.
device atomic_fetch_add_explicit((device atomic_float*) data_ptr, val, memory_order_relaxed);
#else
// Custom atomic addition implementation
// https://github.com/ShoYamanishi/AppleNumericalComputing/blob/053f06c1f5a831095c4bcc29aaf11366fce5231e/03_dot/metal/dot.metal#L447-L472
// https://forums.developer.nvidia.com/t/atomicadd-float-float-atomicmul-float-float/14639
// https://on-demand.gputechconf.com/gtc/2013/presentations/S3101-Atomic-Memory-Operations.pdf (See the last slide)
// Create an atomic uint pointer for atomic transaction.
device atomic_uint* atom_var = (device atomic_uint*)data_ptr;
// Create necessary storage.
uint fetched_uint, assigning_uint;
T fetched_float, assigning_float;
// Replace the value in atom_var with 0 and return the previous value in atom_var.
fetched_uint = atomic_exchange_explicit( atom_var, 0 /*desired*/, memory_order_relaxed);
// Read out the previous value as float.
fetched_float = *( (thread T*) &fetched_uint );
// Do addition and represent the addition result in uint for atomic transaction.
assigning_float = fetched_float + val;
assigning_uint = *((thread uint*) &assigning_float);
// atom_var should be 0 now, try to assign the addition result back to the atom_var (data_ptr).
while ((fetched_uint = atomic_exchange_explicit( atom_var, assigning_uint /*desired*/, memory_order_relaxed)) != 0) {
// If atom_var was not 0, i.e. fetched_uint != 0, it means that the data has been modified by other threads.
// Try to assign 0 and get the previously assigned addition result.
uint fetched_uint_again = atomic_exchange_explicit(atom_var, 0 /*desired*/, memory_order_relaxed);
T fetched_float_again = *( (thread T*) &fetched_uint_again );
// Re-add again
fetched_float = *((thread T*) &(fetched_uint));
// Previously assigned addition result + addition result from other threads.
assigning_float = fetched_float_again + fetched_float;
assigning_uint = *( (thread uint*) &assigning_float);
}
#endif
atomic_fetch_add_explicit((device atomic_float*) data_ptr, val, memory_order_relaxed);
}
inline void atomic_add_float(device half* data_ptr, const half val)
{
atomic_fetch_add_explicit((device atomic_float*) data_ptr, static_cast<float>(val), memory_order_relaxed);
}
template <typename T, typename integer_t>
Expand Down Expand Up @@ -1061,40 +1030,12 @@ REGISTER_PS_ROI_POOL_OP(half, int64_t);
REGISTER_PS_ROI_POOL_BACKWARD_OP(float, int64_t);
REGISTER_PS_ROI_POOL_BACKWARD_OP(half, int64_t);
)VISION_METAL";

static id<MTLLibrary> compileVisionOpsLibrary(id<MTLDevice> device) {
static id<MTLLibrary> visionLibrary = nil;
if (visionLibrary) {
return visionLibrary;
}

NSError* error = nil;
MTLCompileOptions* options = [[MTLCompileOptions new] autorelease];
[options setLanguageVersion:MTLLanguageVersion2_3];
visionLibrary = [device newLibraryWithSource:[NSString stringWithCString:METAL_VISION encoding:NSASCIIStringEncoding]
options:options
error:&error];
TORCH_CHECK(visionLibrary, "Failed to create metal vision library, error: ", [[error description] UTF8String]);
return visionLibrary;
}

static id<MTLComputePipelineState> visionPipelineState(id<MTLDevice> device, const std::string& kernel) {
static std::unordered_map<std::string, id<MTLComputePipelineState>> psoCache;
id<MTLComputePipelineState> pso = psoCache[kernel];
if (pso) {
return pso;
}

NSError* error = nil;
id<MTLLibrary> visionLib = compileVisionOpsLibrary(device);
id<MTLFunction> visionFunc = [visionLib newFunctionWithName:[NSString stringWithUTF8String:kernel.c_str()]];
TORCH_CHECK(visionFunc, "Failed to create function state object for: ", kernel);
pso = [device newComputePipelineStateWithFunction:visionFunc error:&error];
TORCH_CHECK(pso, "Failed to created pipeline state object, error: ", [[error description] UTF8String]);
)VISION_METAL");

psoCache[kernel] = pso;
return pso;
static id<MTLComputePipelineState> visionPipelineState(
id<MTLDevice> device,
const std::string& kernel) {
return lib.getPipelineStateForFunc(kernel);
}

} // namespace mps
Expand Down
1 change: 0 additions & 1 deletion torchvision/csrc/ops/mps/ps_roi_pool_kernel.mm
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@

float spatial_scale_f = static_cast<float>(spatial_scale);

auto num_rois = rois.size(0);
auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options());

if (grad.numel() == 0) {
Expand Down

0 comments on commit 937f2ab

Please sign in to comment.