diff --git a/dali/benchmark/displacement_cpu_bench.cc b/dali/benchmark/displacement_cpu_bench.cc index 4be5e3b6393..9185f63a50d 100644 --- a/dali/benchmark/displacement_cpu_bench.cc +++ b/dali/benchmark/displacement_cpu_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -88,7 +88,7 @@ void DisplacementBench(benchmark::State& st) {//NOLINT // tensor out is resized by operator itself in DisplacementFilter::DataDependentSetup() // TODO(klecki) Accomodate to use different inputs from test data - auto *ptr = (*tensor_in)[0].template mutable_data(); + auto *ptr = (*tensor_in).template mutable_tensor(0); for (int i = 0; i < N; i++) { ptr[i] = i; } diff --git a/dali/benchmark/operator_bench.h b/dali/benchmark/operator_bench.h index 47e819ad94c..c69303bc5da 100644 --- a/dali/benchmark/operator_bench.h +++ b/dali/benchmark/operator_bench.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,16 +54,13 @@ class OperatorBench : public DALIBenchmark { auto op_ptr = InstantiateOperator(op_spec); auto data_in = std::make_shared>(batch_size); - for (auto &in_ptr : *data_in) { - in_ptr = std::make_shared>(); - in_ptr->set_type(); - in_ptr->Resize({H, W, C}); - in_ptr->SetLayout("HWC"); - } + data_in->set_type(); + data_in->Resize(uniform_list_shape(batch_size, TensorShape<>{H, W, C})); + data_in->SetLayout("HWC"); if (fill_in_data) { - for (auto &in_ptr : *data_in) { - auto *ptr = in_ptr->template mutable_data(); + for (int sample_id = 0; sample_id < batch_size; sample_id++) { + auto *ptr = data_in->template mutable_tensor(sample_id); for (int i = 0; i < N; i++) { ptr[i] = static_cast(i); } diff --git a/dali/c_api/c_api.cc b/dali/c_api/c_api.cc index 12acc45626f..6e571e5cdf6 100644 --- a/dali/c_api/c_api.cc +++ b/dali/c_api/c_api.cc @@ -143,11 +143,14 @@ void SetExternalInputTensors(daliPipelineHandle *pipe_handle, const char *name, // We cast away the const from data_ptr, as there is no other way of passing it to the // Tensor as we must also set the shape and type metadata. // The vector that we pass to pipeline is const. - data[i].set_pinned(flags & DALI_ext_pinned); - data[i].set_order(order); - data[i].ShareData(const_cast(data_ptr[i]), tl_shape[i].num_elements() * elem_sizeof); - data[i].Resize(tl_shape[i], type_id); - data[i].SetLayout(layout); + dali::Tensor tmp; + tmp.set_order(order); + std::shared_ptr ptr(const_cast(data_ptr[i]), [](void *){}); // no deleter + tmp.set_backing_allocation(ptr, + tl_shape[i].num_elements() * elem_sizeof, flags & DALI_ext_pinned, + type_id, tl_shape[i].num_elements()); + tmp.Resize(tl_shape[i], type_id); + data.UnsafeSetSample(i, tmp); } pipeline->SetExternalInput(name, data, order, flags & DALI_ext_force_sync, @@ -429,7 +432,7 @@ size_t daliNumElements(daliPipelineHandle* pipe_handle, int n) { template static size_t daliTensorSizeHelper(dali::DeviceWorkspace* ws, int n) { - return ws->Output(n).nbytes(); + return ws->Output(n).total_nbytes(); } size_t daliTensorSize(daliPipelineHandle* pipe_handle, int n) { diff --git a/dali/operators/audio/nonsilence_op.h b/dali/operators/audio/nonsilence_op.h index 45b9c689c6d..cfe0a01ac9a 100644 --- a/dali/operators/audio/nonsilence_op.h +++ b/dali/operators/audio/nonsilence_op.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -228,8 +228,8 @@ class NonsilenceOperatorCpu : public NonsilenceOperator { args.reset_interval = reset_interval_; auto res = DetectNonsilenceRegion(intermediate_buffers_[thread_id], args); - auto beg_ptr = output_begin[sample_id].mutable_data(); - auto len_ptr = output_length[sample_id].mutable_data(); + auto beg_ptr = output_begin.mutable_tensor(sample_id); + auto len_ptr = output_length.mutable_tensor(sample_id); *beg_ptr = res.first; *len_ptr = res.second; }, in_shape.tensor_size(sample_id)); diff --git a/dali/operators/audio/preemphasis_filter_op.cc b/dali/operators/audio/preemphasis_filter_op.cc index fc319a4b1bc..106780a4cda 100644 --- a/dali/operators/audio/preemphasis_filter_op.cc +++ b/dali/operators/audio/preemphasis_filter_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -65,11 +65,11 @@ void PreemphasisFilterCPU::RunImplTyped(workspace_t &ws) { for (int sample_id = 0; sample_id < nsamples; sample_id++) { tp.AddWork( [this, &output, &input, sample_id](int thread_id) { - const auto in_ptr = input[sample_id].data(); - auto out_ptr = output[sample_id].mutable_data(); - DALI_ENFORCE(input[sample_id].shape() == output[sample_id].shape(), + const auto in_ptr = input.tensor(sample_id); + auto out_ptr = output.mutable_tensor(sample_id); + DALI_ENFORCE(input.tensor_shape(sample_id) == output.tensor_shape(sample_id), "Input and output shapes don't match"); - auto n = volume(output[sample_id].shape()); + auto n = volume(output.tensor_shape(sample_id)); auto coeff = preemph_coeff_[sample_id]; if (coeff == 0.0f) { for (int64_t j = 0; j < n; j++) { diff --git a/dali/operators/decoder/audio/audio_decoder_op.cc b/dali/operators/decoder/audio/audio_decoder_op.cc index 1e07d6fedff..6cc1759d9df 100644 --- a/dali/operators/decoder/audio/audio_decoder_op.cc +++ b/dali/operators/decoder/audio/audio_decoder_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -88,13 +88,13 @@ AudioDecoderCpu::SetupImpl(std::vector &output_desc, const workspace for (int i = 0; i < batch_size; i++) { auto &meta = sample_meta_[i] = - decoders_[i]->Open({static_cast(input[i].raw_data()), - input[i].shape().num_elements()}); + decoders_[i]->Open({static_cast(input.raw_tensor(i)), + input.tensor_shape(i).num_elements()}); TensorShape<> data_sample_shape = DecodedAudioShape( meta, use_resampling_ ? target_sample_rates_[i] : -1.0f, downmix_); shape_data.set_tensor_shape(i, data_sample_shape); shape_rate.set_tensor_shape(i, {}); - files_names_[i] = input[i].GetSourceInfo(); + files_names_[i] = input.GetMeta(i).GetSourceInfo(); } output_desc[0] = { shape_data, output_type_ }; diff --git a/dali/operators/decoder/decoder_test.h b/dali/operators/decoder/decoder_test.h index d9ecd0efad9..18531e8835e 100644 --- a/dali/operators/decoder/decoder_test.h +++ b/dali/operators/decoder/decoder_test.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include "dali/pipeline/data/types.h" #include "dali/test/dali_test_decoder.h" namespace dali { @@ -64,6 +65,7 @@ class DecodeTestBase : public GenericDecoderTest { // single input - encoded images // single output - decoded images TensorVector out(inputs[0]->num_samples()); + std::vector> tmp_out(inputs[0]->num_samples()); const TensorList &encoded_data = *inputs[0]; const int c = this->GetNumColorComp(); @@ -72,7 +74,16 @@ class DecodeTestBase : public GenericDecoderTest { auto data_size = volume(encoded_data.tensor_shape(i)); this->DecodeImage( data, data_size, c, this->ImageType(), - &out[i], GetCropWindowGenerator(i)); + &tmp_out[i], GetCropWindowGenerator(i)); + } + + TensorListShape<> out_shape(inputs[0]->num_samples(), 3); + for (size_t i = 0; i < encoded_data.num_samples(); ++i) { + out_shape.set_tensor_shape(i, tmp_out[i].shape()); + } + out.Resize(out_shape, DALI_UINT8); + for (size_t i = 0; i < encoded_data.num_samples(); ++i) { + out.UnsafeSetSample(i, tmp_out[i]); } vector>> outputs; diff --git a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h index 713f2d8f49e..bf50a3d0ba9 100644 --- a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h +++ b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -554,15 +554,16 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { samples_jpeg2k_.clear(); #endif // NVJPEG2K_ENABLED + const auto &input = ws.Input(0); for (int i = 0; i < curr_batch_size; i++) { - const auto &in = ws.Input(0)[i]; - const auto in_size = in.size(); - thread_pool_.AddWork([this, i, &in, in_size](int tid) { - auto *input_data = in.data(); + auto *input_data = input.tensor(i); + const auto in_size = input.tensor_shape(i).num_elements(); + const auto &source_info = input.GetMeta(i).GetSourceInfo(); + thread_pool_.AddWork([this, i, input_data, in_size, source_info](int tid) { SampleData &data = sample_data_[i]; data.clear(); data.sample_idx = i; - data.file_name = in.GetSourceInfo(); + data.file_name = source_info; data.encoded_length = in_size; auto cached_shape = CacheImageShape(data.file_name); @@ -704,15 +705,17 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { void ProcessImagesCuda(MixedWorkspace &ws) { auto& output = ws.Output(0); + const auto &input = ws.Input(0); for (auto *sample : samples_single_) { assert(sample); auto i = sample->sample_idx; auto *output_data = output.mutable_tensor(i); - const auto &in = ws.Input(0)[i]; + const auto *in_data = input.tensor(i); + const auto in_size = input.tensor_shape(i).num_elements(); thread_pool_.AddWork( - [this, sample, &in, output_data](int tid) { - SampleWorker(sample->sample_idx, sample->file_name, in.size(), tid, - in.data(), output_data, streams_[tid]); + [this, sample, in_data, in_size, output_data](int tid) { + SampleWorker(sample->sample_idx, sample->file_name, in_size, tid, + in_data, output_data, streams_[tid]); }, task_priority_seq_--); // FIFO order, since the samples were already ordered } } @@ -808,15 +811,17 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { } void ProcessImagesHost(MixedWorkspace &ws) { + const auto &input = ws.Input(0); auto& output = ws.Output(0); for (auto *sample : samples_host_) { auto i = sample->sample_idx; + const auto *input_data = input.tensor(i); + auto in_size = input.tensor_shape(i).num_elements(); auto *output_data = output.mutable_tensor(i); - const auto &in = ws.Input(0)[i]; ImageCache::ImageShape shape = output_shape_[i].to_static<3>(); thread_pool_.AddWork( - [this, sample, &in, output_data, shape](int tid) { - HostFallback(in.data(), in.size(), output_image_type_, output_data, + [this, sample, input_data, in_size, output_data, shape](int tid) { + HostFallback(input_data, in_size, output_image_type_, output_data, streams_[tid], sample->file_name, sample->roi, use_fast_idct_); CacheStore(sample->file_name, output_data, shape, streams_[tid]); }, task_priority_seq_--); // FIFO order, since the samples were already ordered @@ -846,13 +851,13 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { int j = 0; TensorVector tv(samples_hw_batched_.size()); + const auto &input = ws.Input(0); for (auto *sample : samples_hw_batched_) { int i = sample->sample_idx; - const auto &in = ws.Input(0)[i]; const auto &out_shape = output_shape_.tensor_shape(i); - tv[j].ShareData(const_cast &>(in)); - in_lengths_[j] = in.size(); + tv.UnsafeSetSample(j, input, i); + in_lengths_[j] = input.tensor_shape(i).num_elements(); nvjpeg_destinations_[j].channel[0] = output.mutable_tensor(i); nvjpeg_destinations_[j].pitch[0] = out_shape[1] * out_shape[2]; nvjpeg_params_[j] = sample->params; diff --git a/dali/operators/generic/cast.cc b/dali/operators/generic/cast.cc index 8850c0bce3f..22090bbb9c9 100644 --- a/dali/operators/generic/cast.cc +++ b/dali/operators/generic/cast.cc @@ -51,8 +51,8 @@ void CastCPU::RunImpl(HostWorkspace &ws) { TYPE_SWITCH(itype, type2id, IType, CAST_ALLOWED_TYPES, ( for (int sample_id = 0; sample_id < num_samples; sample_id++) { - auto *out = output[sample_id].mutable_data(); - const auto *in = input[sample_id].data(); + auto *out = output.mutable_tensor(sample_id); + const auto *in = input.tensor(sample_id); auto size = input_shape.tensor_size(sample_id); tp.AddWork([out, in, size](int thread_id) { CpuHelper(out, in, size); }, size); diff --git a/dali/operators/generic/constant.cc b/dali/operators/generic/constant.cc index d01a4f48537..199ca70618a 100644 --- a/dali/operators/generic/constant.cc +++ b/dali/operators/generic/constant.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ void FillTensorVector( assert(is_uniform(shape)); int64_t n = shape[0].num_elements(); assert(src.size() == static_cast(n) || src.size() == 1); - Dst *out = dst[0].mutable_data(); + Dst *out = dst.mutable_tensor(0); if (src.size() == 1) { Dst val = ConvertSat(src[0]); for (int64_t i = 0; i < n; i++) { @@ -92,7 +92,7 @@ void FillTensorVector( } } for (int i = 1; i < shape.num_samples(); i++) { - dst[i].ShareData(dst[0]); + dst.UnsafeSetSample(i, dst, 0); } } } // namespace @@ -116,7 +116,7 @@ void Constant::RunImpl(HostWorkspace &ws) { out.Resize(output_shape_); int N = output_shape_.num_samples(); for (int i = 0; i < N; i++) { - assert(out[i].raw_data() == output_[i].raw_data()); + assert(out.raw_tensor(i) == output_.raw_tensor(i)); } out.SetLayout(layout_); } diff --git a/dali/operators/generic/erase/erase_utils.h b/dali/operators/generic/erase/erase_utils.h index bdbc7ffa9b0..9ac20e82642 100644 --- a/dali/operators/generic/erase/erase_utils.h +++ b/dali/operators/generic/erase/erase_utils.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -95,17 +95,17 @@ std::vector> GetEraseArgs(const OpSpec &spec, for (int i = 0; i < nsamples; i++) { if (has_tensor_roi_anchor) { - const auto& anchor = ws.ArgumentInput("anchor")[i]; - assert(anchor.size() > 0); - roi_anchor.resize(anchor.size()); - std::memcpy(roi_anchor.data(), anchor.data(), sizeof(float) * roi_anchor.size()); + const auto& anchor = view(ws.ArgumentInput("anchor")[i]); + assert(anchor.shape.num_elements() > 0); + roi_anchor.resize(anchor.shape.num_elements()); + std::memcpy(roi_anchor.data(), anchor.data, sizeof(float) * roi_anchor.size()); } if (has_tensor_roi_shape) { - const auto& shape = ws.ArgumentInput("shape")[i]; - assert(shape.size() > 0); - roi_shape.resize(shape.size()); - std::memcpy(roi_shape.data(), shape.data(), sizeof(float) * roi_shape.size()); + const auto& shape = view(ws.ArgumentInput("shape")[i]); + assert(shape.shape.num_elements() > 0); + roi_shape.resize(shape.num_elements()); + std::memcpy(roi_shape.data(), shape.data, sizeof(float) * roi_shape.size()); } DALI_ENFORCE(roi_anchor.size() == roi_shape.size()); diff --git a/dali/operators/generic/lookup_table.cc b/dali/operators/generic/lookup_table.cc index ce66d5d8365..ca62c4440a0 100644 --- a/dali/operators/generic/lookup_table.cc +++ b/dali/operators/generic/lookup_table.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,8 +25,8 @@ void LookupValuesImpl(ThreadPool &tp, TensorVector &output, const Output *lookup_table, const Output default_value) { for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) { auto data_size = shape.tensor_size(sample_idx); - auto *out_data = output[sample_idx].mutable_data(); - const auto *in_data = input[sample_idx].data(); + auto *out_data = output.mutable_tensor(sample_idx); + const auto *in_data = input.tensor(sample_idx); tp.AddWork( [=](int thread_id) { for (int64_t i = 0; i < data_size; i++) { diff --git a/dali/operators/generic/permute_batch.cc b/dali/operators/generic/permute_batch.cc index dacfefae0eb..ddb7961a409 100644 --- a/dali/operators/generic/permute_batch.cc +++ b/dali/operators/generic/permute_batch.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,7 +45,8 @@ void PermuteBatch::RunImpl(HostWorkspace &ws) { int src = indices_[i]; tp.AddWork([&, i, src](int tid) { output.SetMeta(i, input.GetMeta(i)); - output[i].Copy(input[src]); + // TODO(klecki): SetSample + output.UnsafeCopySample(i, input, src); }, size); } tp.RunAll(); diff --git a/dali/operators/generic/reshape.cc b/dali/operators/generic/reshape.cc index ad5446d76c8..bb3361f6c49 100644 --- a/dali/operators/generic/reshape.cc +++ b/dali/operators/generic/reshape.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -393,8 +393,8 @@ void Reshape::RunImpl(HostWorkspace &ws) { out.Resize(output_shape_, output_type_->id()); int N = output_shape_.num_samples(); for (int i = 0; i < N; i++) { - assert(out[i].raw_data() == in[i].raw_data()); - assert(out[i].shape() == output_shape_[i]); + assert(out.raw_tensor(i) == in.raw_tensor(i)); + assert(out.tensor_shape(i) == output_shape_[i]); } out.SetLayout(layout); } diff --git a/dali/operators/generic/shapes.h b/dali/operators/generic/shapes.h index 3280fb8d545..9d57229214f 100644 --- a/dali/operators/generic/shapes.h +++ b/dali/operators/generic/shapes.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,7 +77,7 @@ class Shapes : public Operator { int n = out.num_samples(); assert(n == shape.num_samples()); for (int i = 0; i < n; i++) { - type *data = out[i].mutable_data(); + type *data = out.mutable_tensor(i); auto sample_shape = shape.tensor_shape_span(i); for (int j = 0; j < shape.sample_dim(); j++) data[j] = sample_shape[j]; diff --git a/dali/operators/generic/slice/slice_base.cc b/dali/operators/generic/slice/slice_base.cc index 38f2d69798f..e0b69e4bcd4 100644 --- a/dali/operators/generic/slice/slice_base.cc +++ b/dali/operators/generic/slice/slice_base.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -58,7 +58,6 @@ bool SliceBaseCpu::SetupImpl(std::vector(input); for (int i = 0; i < nsamples; i++) { auto in_view = view(input[i]); auto req = Kernel().Setup(ctx, in_view, args_[i]); diff --git a/dali/operators/generic/transpose/transpose.cc b/dali/operators/generic/transpose/transpose.cc index 294407f05e8..cb28194ad25 100644 --- a/dali/operators/generic/transpose/transpose.cc +++ b/dali/operators/generic/transpose/transpose.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -46,8 +46,8 @@ class TransposeCPU : public Transpose { TensorShape<> src_ts = input.shape()[i]; auto dst_ts = permute(src_ts, perm_); kernels::TransposeGrouped( - TensorView{output[i].mutable_data(), dst_ts}, - TensorView{input[i].data(), src_ts}, make_cspan(perm_)); + TensorView{output.mutable_tensor(i), dst_ts}, + TensorView{input.tensor(i), src_ts}, make_cspan(perm_)); }, out_shape.tensor_size(i)); } ), DALI_FAIL(make_string("Unsupported input type: ", input_type))); // NOLINT diff --git a/dali/operators/geometry/coord_flip.cc b/dali/operators/geometry/coord_flip.cc index 2e9ee69ef8f..929810fc61f 100644 --- a/dali/operators/geometry/coord_flip.cc +++ b/dali/operators/geometry/coord_flip.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,11 +77,11 @@ void CoordFlipCPU::RunImpl(workspace_t &ws) { mirrored_origin[y_dim_] = 2.0f * spec_.GetArgument("center_y", &ws, sample_id); mirrored_origin[z_dim_] = 2.0f * spec_.GetArgument("center_z", &ws, sample_id); - auto in_size = volume(input[sample_id].shape()); + auto in_size = volume(input.tensor_shape(sample_id)); thread_pool.AddWork( [this, &input, in_size, &output, sample_id, flip_dim, mirrored_origin](int thread_id) { - const auto *in = input[sample_id].data(); - auto *out = output[sample_id].mutable_data(); + const auto *in = input.tensor(sample_id); + auto *out = output.mutable_tensor(sample_id); int d = 0; int64_t i = 0; for (; i < in_size; i++, d++) { diff --git a/dali/operators/geometry/mt_transform_attr_test.cc b/dali/operators/geometry/mt_transform_attr_test.cc index 12d71dbeb90..2d3b03e3652 100644 --- a/dali/operators/geometry/mt_transform_attr_test.cc +++ b/dali/operators/geometry/mt_transform_attr_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -155,11 +155,11 @@ TEST(MTTransformAttr, MInputTInput) { Minp->Resize(Mtls, DALI_FLOAT); Tinp->Resize(Ttls, DALI_FLOAT); for (int i = 0; i < N; i++) { - float *data = (*Minp)[i].mutable_data(); + float *data = Minp->mutable_tensor(i); for (int j = 0; j < volume(Mtls[i]); j++) data[j] = 1 + j + i * 10; - data = (*Tinp)[i].mutable_data(); + data = Tinp->mutable_tensor(i); for (int j = 0; j < volume(Ttls[i]); j++) data[j] = 10 + j * 10 + i * 100; } @@ -198,9 +198,9 @@ TEST(MTTransformAttr, MScalarInputTScalarInput) { Minp->Resize(tls, DALI_FLOAT); Tinp->Resize(tls, DALI_FLOAT); for (int i = 0; i < N; i++) { - float *data = (*Minp)[i].mutable_data(); + float *data = Minp->mutable_tensor(i); data[0] = i + 10; - data = (*Tinp)[i].mutable_data(); + data = Tinp->mutable_tensor(i); data[0] = i + 100; } @@ -233,7 +233,7 @@ TEST(MTTransformAttr, MTInput) { int N = tls.num_samples();; MTinp->Resize(tls, DALI_FLOAT); for (int i = 0; i < N; i++) { - auto *data = (*MTinp)[i].mutable_data(); + auto *data = MTinp->mutable_tensor(i); for (int j = 0; j < volume(tls[i]); j++) data[j] = 1 + j + i * 10; } @@ -342,7 +342,7 @@ TEST(MTTransformAttr, MTInput_ErrorSize) { int N = tls.num_samples();; MTinp->Resize(tls, DALI_FLOAT); for (int i = 0; i < N; i++) { - auto *data = (*MTinp)[i].mutable_data(); + auto *data = MTinp->mutable_tensor(i); for (int j = 0; j < volume(tls[i]); j++) data[j] = 1 + j + i * 10; } diff --git a/dali/operators/image/convolution/gaussian_blur.cc b/dali/operators/image/convolution/gaussian_blur.cc index 60042b28c78..37cae5a7446 100644 --- a/dali/operators/image/convolution/gaussian_blur.cc +++ b/dali/operators/image/convolution/gaussian_blur.cc @@ -108,10 +108,10 @@ class GaussianBlurOpCpu : public OpImplBase { params_[i] = ObtainSampleParams(i, spec_, ws); windows_[i].PrepareWindows(params_[i]); // We take only last `ndim` siginificant dimensions to handle sequences as well - auto elem_shape = input[i].shape().template last(); + auto elem_shape = input.tensor_shape(i).template last(); auto& req = kmgr_.Setup(i, ctx_, elem_shape, params_[i].window_sizes); // The shape of data stays untouched - output_desc[0].shape.set_tensor_shape(i, input[i].shape()); + output_desc[0].shape.set_tensor_shape(i, input.tensor_shape(i)); } return true; } @@ -125,7 +125,7 @@ class GaussianBlurOpCpu : public OpImplBase { int nsamples = input.shape().num_samples(); for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { - const auto& shape = input[sample_idx].shape(); + const auto& shape = input.tensor_shape(sample_idx); auto elem_volume = volume(shape.begin() + dim_desc_.usable_axes_start, shape.end()); int seq_elements = 1; @@ -138,11 +138,11 @@ class GaussianBlurOpCpu : public OpImplBase { thread_pool.AddWork( [this, &input, &output, sample_idx, elem_idx, stride](int thread_id) { auto gaussian_windows = windows_[sample_idx].GetWindows(); - auto elem_shape = input[sample_idx].shape().template last(); + auto elem_shape = input.tensor_shape(sample_idx).template last(); auto in_view = TensorView{ - input[sample_idx].template data() + stride * elem_idx, elem_shape}; + input.template tensor(sample_idx) + stride * elem_idx, elem_shape}; auto out_view = TensorView{ - output[sample_idx].template mutable_data() + stride * elem_idx, elem_shape}; + output.template mutable_tensor(sample_idx) + stride * elem_idx, elem_shape}; // I need a context for that particular run (or rather matching the thread & // scratchpad) auto ctx = ctx_; diff --git a/dali/operators/image/convolution/laplacian.cc b/dali/operators/image/convolution/laplacian.cc index 106d662d796..6cec3a3879f 100644 --- a/dali/operators/image/convolution/laplacian.cc +++ b/dali/operators/image/convolution/laplacian.cc @@ -141,10 +141,10 @@ class LaplacianOpCpu : public OpImplBase { kmgr_.template Resize(nsamples); for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { // We take only last `ndim` siginificant dimensions to handle sequences as well - auto elem_shape = input[sample_idx].shape().template last(); + auto elem_shape = input.shape()[sample_idx].template last(); kmgr_.Setup(sample_idx, ctx_, elem_shape, args.GetWindowSizes(sample_idx)); // The shape of data stays untouched - output_desc[0].shape.set_tensor_shape(sample_idx, input[sample_idx].shape()); + output_desc[0].shape.set_tensor_shape(sample_idx, input.tensor_shape(sample_idx)); } return true; } @@ -158,7 +158,7 @@ class LaplacianOpCpu : public OpImplBase { int nsamples = input.shape().num_samples(); for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) { - const auto& shape = input[sample_idx].shape(); + const auto& shape = input.tensor_shape(sample_idx); auto elem_volume = volume(shape.begin() + dim_desc_.usable_axes_start, shape.end()); auto priority = elem_volume * args.GetTotalWindowSizes(sample_idx); int seq_elements = volume(shape.begin(), shape.begin() + dim_desc_.usable_axes_start); @@ -170,9 +170,9 @@ class LaplacianOpCpu : public OpImplBase { const auto& scales = args.GetScales(sample_idx); auto elem_shape = input[sample_idx].shape().template last(); auto in_view = TensorView{ - input[sample_idx].template data() + stride * elem_idx, elem_shape}; + input.template tensor(sample_idx) + stride * elem_idx, elem_shape}; auto out_view = TensorView{ - output[sample_idx].template mutable_data() + stride * elem_idx, elem_shape}; + output.template mutable_tensor(sample_idx) + stride * elem_idx, elem_shape}; // Copy context so that the kernel instance can modify scratchpad auto ctx = ctx_; kmgr_.Run(sample_idx, ctx, out_view, in_view, windows_[sample_idx], diff --git a/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu b/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu index d9a377e2cfc..0bd2c886e0b 100644 --- a/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu +++ b/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu @@ -71,7 +71,7 @@ void JpegCompressionDistortionGPU::RunImpl(workspace_t &ws) { // Set quality argument for an image from samples if (is_sequence) { for (int i = 0; i < nsamples; i++) { - auto nframes = input.tensor_shape_span(i)[0]; + auto nframes = input.shape().tensor_shape_span(i)[0]; for (int j = 0; j < nframes; ++j) { quality_.push_back(quality_arg_[i].data[0]); } diff --git a/dali/operators/image/peek_shape/peek_image_shape.h b/dali/operators/image/peek_shape/peek_image_shape.h index 7b15129182c..9a97122293d 100644 --- a/dali/operators/image/peek_shape/peek_image_shape.h +++ b/dali/operators/image/peek_shape/peek_image_shape.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,8 +16,10 @@ #define DALI_OPERATORS_IMAGE_PEEK_SHAPE_PEEK_IMAGE_SHAPE_H_ #include +#include "dali/core/backend_tags.h" #include "dali/image/image_factory.h" #include "dali/core/tensor_shape.h" +#include "dali/pipeline/data/types.h" #include "dali/pipeline/operator/operator.h" #include "dali/core/static_switch.h" @@ -62,10 +64,9 @@ class PeekImageShape : public Operator { } template - void WriteShape(Tensor &out, const TensorShape<3> &shape) { - type *data = out.mutable_data(); + void WriteShape(TensorView out, const TensorShape<3> &shape) { for (int i = 0; i < 3; ++i) { - data[i] = shape[i]; + out.data[i] = shape[i]; } } @@ -74,20 +75,22 @@ class PeekImageShape : public Operator { const auto &input = ws.template Input(0); auto &output = ws.template Output(0); size_t batch_size = input.num_samples(); + DALI_ENFORCE(input.type() == DALI_UINT8, "Input must be stored as uint8 data."); for (size_t sample_id = 0; sample_id < batch_size; ++sample_id) { thread_pool.AddWork([sample_id, &input, &output, this] (int tid) { const auto& image = input[sample_id]; // Verify input - DALI_ENFORCE(image.ndim() == 1, + // TODO(klecki): Move the checks to scope above + DALI_ENFORCE(image.shape().sample_dim() == 1, "Input must be 1D encoded jpeg string."); - DALI_ENFORCE(IsType(image.type()), - "Input must be stored as uint8 data."); - auto img = ImageFactory::CreateImage(image.data(), image.size(), {}); + DALI_ENFORCE(image.type() == DALI_UINT8, "Input must be stored as uint8 data."); + auto img = + ImageFactory::CreateImage(image._data(), image.shape().num_elements(), {}); auto shape = img->PeekShape(); TYPE_SWITCH(output_type_, type2id, type, (int32_t, uint32_t, int64_t, uint64_t, float, double), - (WriteShape(output[sample_id], shape);), + (WriteShape(view(output[sample_id]), shape);), (DALI_FAIL(make_string("Unsupported type for Shapes: ", output_type_)))); }, 0); // the amount of work depends on the image format and exact sample which is unknown here diff --git a/dali/operators/image/remap/displacement_filter_impl_cpu.h b/dali/operators/image/remap/displacement_filter_impl_cpu.h index 15a272672b6..afe2ac823c6 100644 --- a/dali/operators/image/remap/displacement_filter_impl_cpu.h +++ b/dali/operators/image/remap/displacement_filter_impl_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include "dali/core/common.h" #include "dali/operators/image/remap/displacement_filter.h" +#include "dali/pipeline/data/sample_view.h" #include "dali/pipeline/data/views.h" #include "dali/kernels/kernel_params.h" #include "dali/kernels/imgproc/sampler.h" @@ -90,7 +91,7 @@ class DisplacementFilter } template - void RunWarp(Tensor &output, const Tensor &input, int thread_idx) { + void RunWarp(SampleView output, ConstSampleView input, int thread_idx) { auto &displace = displace_[thread_idx]; In fill[1024]; auto in = view(input); @@ -109,9 +110,9 @@ class DisplacementFilter PrepareDisplacement(ws, sample_idx, thread_idx); - if (!has_mask_ || (*mask_)[sample_idx].data()[0]) { - const auto &in_tensor = input[sample_idx]; - auto &out_tensor = output[sample_idx]; + if (!has_mask_ || mask_->tensor(sample_idx)[0]) { + auto in_tensor = input[sample_idx]; + auto out_tensor = output[sample_idx]; switch (interp_type_) { case DALI_INTERP_NN: @@ -138,9 +139,7 @@ class DisplacementFilter " only NN and LINEAR are supported for this operation"); } } else { - const auto &in_tensor = input[sample_idx]; - auto &out_tensor = output[sample_idx]; - out_tensor.Copy(in_tensor); + output.UnsafeCopySample(sample_idx, input, sample_idx); } } diff --git a/dali/operators/image/remap/displacement_filter_impl_gpu.cuh b/dali/operators/image/remap/displacement_filter_impl_gpu.cuh index 7064e327b23..915d1955493 100644 --- a/dali/operators/image/remap/displacement_filter_impl_gpu.cuh +++ b/dali/operators/image/remap/displacement_filter_impl_gpu.cuh @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -325,7 +325,7 @@ class DisplacementFilter(sample_idx); sample.raw_params = GetDisplacementParams(sample_idx); sample.shape = shape.tensor_shape(sample_idx); - sample.mask = has_mask_ ? ws.ArgumentInput("mask")[sample_idx].data()[0] : true; + sample.mask = has_mask_ ? (ws.ArgumentInput("mask").tensor(sample_idx))[0] : true; } samples_dev_.from_host(samples_, stream); diff --git a/dali/operators/image/remap/warp_affine_params.h b/dali/operators/image/remap/warp_affine_params.h index cb6baeae248..0d53de20d1e 100644 --- a/dali/operators/image/remap/warp_affine_params.h +++ b/dali/operators/image/remap/warp_affine_params.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -139,9 +139,9 @@ class WarpAffineParamProvider auto *params = this->template AllocParams(); for (int i = 0; i < num_samples_; i++) { if (invert) { - params[i] = static_cast(input[i].raw_data())->inv(); + params[i] = static_cast(input.raw_tensor(i))->inv(); } else { - params[i] = *static_cast(input[i].raw_data()); + params[i] = *static_cast(input.raw_tensor(i)); } } } diff --git a/dali/operators/image/remap/warp_param_provider.h b/dali/operators/image/remap/warp_param_provider.h index abf9a61b292..44c79e5ff34 100644 --- a/dali/operators/image/remap/warp_param_provider.h +++ b/dali/operators/image/remap/warp_param_provider.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ class InterpTypeProvider { "interp_type must be a single value or contain one value per sample"); interp_types_.resize(n); for (int i = 0; i < n; i++) - interp_types_[i] = tensor_vector[i].data()[0]; + interp_types_[i] = tensor_vector.tensor(i)[0]; } else { interp_types_.resize(1, spec.template GetArgument("interp_type")); } diff --git a/dali/operators/math/expressions/expression_impl_factory.h b/dali/operators/math/expressions/expression_impl_factory.h index 25a920df659..a52215a9a61 100644 --- a/dali/operators/math/expressions/expression_impl_factory.h +++ b/dali/operators/math/expressions/expression_impl_factory.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -130,7 +130,7 @@ struct ExprImplTask { }; inline OutputSamplePtr GetOutputSamplePointer(HostWorkspace &ws, int output_idx, int sample_idx) { - return ws.template Output(output_idx)[sample_idx].raw_mutable_data(); + return ws.template Output(output_idx).raw_mutable_tensor(sample_idx); } inline OutputSamplePtr GetOutputSamplePointer(DeviceWorkspace &ws, int output_idx, int sample_idx) { @@ -138,7 +138,7 @@ inline OutputSamplePtr GetOutputSamplePointer(DeviceWorkspace &ws, int output_id } inline InputSamplePtr GetInputSamplePointer(HostWorkspace &ws, int input_idx, int sample_idx) { - return ws.template Input(input_idx)[sample_idx].raw_data(); + return ws.template Input(input_idx).raw_tensor(sample_idx); } inline InputSamplePtr GetInputSamplePointer(DeviceWorkspace &ws, int input_idx, int sample_idx) { diff --git a/dali/operators/numba_function/numba_func.cc b/dali/operators/numba_function/numba_func.cc index 159d2d4e220..6fc514f17af 100644 --- a/dali/operators/numba_function/numba_func.cc +++ b/dali/operators/numba_function/numba_func.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -260,13 +260,13 @@ void NumbaFuncImpl::RunImpl(workspace_t &ws) { for (size_t out_id = 0; out_id < out_types_.size(); out_id++) { auto& out = ws.Output(out_id); for (int i = 0; i < N; i++) { - out_ptrs[N * out_id + i] = reinterpret_cast(out[i].raw_mutable_data()); + out_ptrs[N * out_id + i] = reinterpret_cast(out.raw_mutable_tensor(i)); } } for (size_t in_id = 0; in_id < in_types_.size(); in_id++) { auto& in = ws.Input(in_id); for (int i = 0; i < N; i++) { - in_ptrs[N * in_id + i] = reinterpret_cast(in[i].raw_data()); + in_ptrs[N * in_id + i] = reinterpret_cast(in.raw_tensor(i)); } } diff --git a/dali/operators/python_function/dltensor_function.cc b/dali/operators/python_function/dltensor_function.cc index 0adab7da4fb..bc2d75d6b2b 100644 --- a/dali/operators/python_function/dltensor_function.cc +++ b/dali/operators/python_function/dltensor_function.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -77,9 +77,9 @@ py::list PrepareDLTensorInputs(HostWorkspace &ws) { py::list input_tuple; for (Index idx = 0; idx < ws.NumInput(); ++idx) { py::list dl_tensor_list; + auto &tvec = ws.UnsafeMutableInput(idx); for (Index i = 0; i < ws.GetInputBatchSize(idx); ++i) { - auto &t = ws.UnsafeMutableInput(idx)[i]; - auto dl_capsule = TensorToDLPackView(t); + auto dl_capsule = TensorToDLPackView(tvec[i], tvec.device_id()); dl_tensor_list.append(dl_capsule); } input_tuple.append(dl_tensor_list); @@ -106,8 +106,8 @@ py::list PrepareDLTensorInputsPerSample(HostWorkspace &ws) { for (Index s = 0; s < batch_size; ++s) { py::list tuple; for (Index idx = 0; idx < ws.NumInput(); ++idx) { - auto &t = ws.UnsafeMutableInput(idx)[s]; - auto dl_capsule = TensorToDLPackView(t); + auto &tvec = ws.UnsafeMutableInput(idx); + auto dl_capsule = TensorToDLPackView(tvec[s], tvec.device_id()); tuple.append(dl_capsule); } input_tuples.append(tuple); @@ -148,7 +148,7 @@ void CopyOutputData(TensorVector &output, std::vector auto out_shape = output.shape(); for (int i = 0; i < batch_size; ++i) { thread_pool.AddWork([&, i](int) { - CopyDlTensor(output[i].raw_mutable_data(), dl_tensors[i]); + CopyDlTensor(output.raw_mutable_tensor(i), dl_tensors[i]); }, out_shape.tensor_size(i)); } thread_pool.RunAll(); diff --git a/dali/operators/reader/loader/nemo_asr_loader.cc b/dali/operators/reader/loader/nemo_asr_loader.cc index c3fe9d57801..d547a7d0745 100644 --- a/dali/operators/reader/loader/nemo_asr_loader.cc +++ b/dali/operators/reader/loader/nemo_asr_loader.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -108,7 +108,7 @@ void NemoAsrLoader::PrepareEmpty(AsrSample &sample) { } template -void NemoAsrLoader::ReadAudio(Tensor &audio, +void NemoAsrLoader::ReadAudio(SampleView audio, const AudioMetadata &audio_meta, const NemoAsrEntry &entry, AudioDecoderBase &decoder, @@ -170,7 +170,7 @@ void NemoAsrLoader::ReadSample(AsrSample& sample) { TYPE_SWITCH(dtype_, type2id, OutputType, (int16_t, int32_t, float), ( // Audio decoding will be run in the prefetch function, once the batch is formed - sample.decode_f_ = [this, &sample, &entry, offset](Tensor &audio, int tid) { + sample.decode_f_ = [this, &sample, &entry, offset](SampleView audio, int tid) { sample.decoder().OpenFromFile(entry.audio_filepath); if (offset > 0) sample.decoder().SeekFrames(offset); diff --git a/dali/operators/reader/loader/nemo_asr_loader.h b/dali/operators/reader/loader/nemo_asr_loader.h index 218573cd2e1..e1cf137e82b 100644 --- a/dali/operators/reader/loader/nemo_asr_loader.h +++ b/dali/operators/reader/loader/nemo_asr_loader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -65,7 +65,7 @@ class AsrSample { return shape_; } - void decode_audio(Tensor& audio, int tid) { + void decode_audio(SampleView audio, int tid) { decode_f_(audio, tid); } @@ -87,7 +87,7 @@ class AsrSample { std::string audio_filepath_; // for tensor metadata purposes TensorShape<> shape_; - std::function&, int)> decode_f_; + std::function, int)> decode_f_; std::unique_ptr decoder_; }; @@ -158,7 +158,7 @@ class DLL_PUBLIC NemoAsrLoader : public Loader { private: template - void ReadAudio(Tensor &audio, + void ReadAudio(SampleView audio, const AudioMetadata &audio_meta, const NemoAsrEntry &entry, AudioDecoderBase &decoder, diff --git a/dali/operators/reader/loader/nemo_asr_loader_test.cc b/dali/operators/reader/loader/nemo_asr_loader_test.cc index fd37584b59b..9adb476628b 100644 --- a/dali/operators/reader/loader/nemo_asr_loader_test.cc +++ b/dali/operators/reader/loader/nemo_asr_loader_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include #include +#include "dali/pipeline/data/backend.h" #include "dali/test/dali_test_config.h" #include "dali/pipeline/data/views.h" #include "dali/test/tensor_test_utils.h" @@ -219,7 +220,8 @@ TEST(NemoAsrLoaderTest, ReadSample) { Tensor sample_audio; loader.ReadSample(sample); sample_audio.Resize(sample.shape(), DALI_INT16); - sample.decode_audio(sample_audio, 0); + auto sample_audio_view = sample_view(sample_audio); + sample.decode_audio(sample_audio_view, 0); ASSERT_EQ(sample.index(), 0); TensorView ref(ref_data.data(), {ref_samples, 2}); Check(ref, view(sample_audio)); @@ -246,7 +248,8 @@ TEST(NemoAsrLoaderTest, ReadSample) { Tensor sample_audio; loader.ReadSample(sample); sample_audio.Resize(sample.shape(), DALI_FLOAT); - sample.decode_audio(sample_audio, 0); + auto sample_audio_view = sample_view(sample_audio); + sample.decode_audio(sample_audio_view, 0); ASSERT_EQ(sample.index(), 0); TensorView ref(downmixed.data(), {ref_samples}); Check(ref, view(sample_audio), EqualEpsRel(1e-5, 1e-5)); @@ -271,7 +274,8 @@ TEST(NemoAsrLoaderTest, ReadSample) { loader.PrepareMetadata(); loader.ReadSample(sample); sample_audio.Resize(sample.shape(), DALI_FLOAT); - sample.decode_audio(sample_audio, 0); + auto sample_audio_view = sample_view(sample_audio); + sample.decode_audio(sample_audio_view, 0); } int64_t downsampled_len = @@ -302,7 +306,10 @@ TEST(NemoAsrLoaderTest, ReadSample) { loader.PrepareMetadata(); loader.ReadSample(sample_int16); sample_int16_audio.Resize(sample_int16.shape(), DALI_INT16); - sample_int16.decode_audio(sample_int16_audio, 0); + SampleView sample_audio_view(sample_int16_audio.raw_mutable_data(), + sample_int16_audio.shape(), + sample_int16_audio.type()); + sample_int16.decode_audio(sample_audio_view, 0); } ASSERT_EQ(volume(sample_audio.shape()), volume(sample_int16_audio.shape())); @@ -381,7 +388,8 @@ TEST(NemoAsrLoaderTest, ReadSample_OffsetAndDuration) { TensorShape<> expected_sh{length, 2}; ASSERT_EQ(expected_sh, sample.shape()); sample_audio.Resize(sample.shape(), DALI_INT16); - sample.decode_audio(sample_audio, 0); + auto sample_audio_view = sample_view(sample_audio); + sample.decode_audio(sample_audio_view, 0); TensorView ref(ref_data.data() + offset * 2, expected_sh); Check(ref, view(sample_audio)); diff --git a/dali/operators/reader/nemo_asr_reader_op.cc b/dali/operators/reader/nemo_asr_reader_op.cc index 8aad58802ad..4d98793b9c0 100755 --- a/dali/operators/reader/nemo_asr_reader_op.cc +++ b/dali/operators/reader/nemo_asr_reader_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,6 +13,8 @@ // limitations under the License. #include "dali/operators/reader/nemo_asr_reader_op.h" +#include "dali/pipeline/data/backend.h" +#include "dali/pipeline/data/types.h" namespace dali { @@ -163,8 +165,8 @@ void NemoAsrReader::Prefetch() { // Waiting until all the audio samples are ready to be consumed decoded_map_.clear(); for (int i = 0; i < nsamples; i++) { - auto &sample = *curr_batch[i]; - auto &audio = audio_batch[i]; + AsrSample &sample = *curr_batch[i]; + SampleView audio = audio_batch[i]; if (decoded_map_.find(&sample) != decoded_map_.end()) continue; @@ -173,7 +175,7 @@ void NemoAsrReader::Prefetch() { const auto &audio_meta = sample.audio_meta(); int64_t priority = audio_meta.length * audio_meta.channels; thread_pool_.AddWork( - [&audio, &sample](int tid) { + [audio, &sample](int tid) { sample.decode_audio(audio, tid); }, priority); } @@ -183,7 +185,7 @@ void NemoAsrReader::Prefetch() { for (int i = 0; i < nsamples; i++) { auto it = decoded_map_.find(curr_batch[i].get()); if (it != decoded_map_.end() && it->second != i) { - audio_batch[i].Copy(audio_batch[it->second]); + audio_batch.UnsafeCopySample(i, audio_batch, it->second); } } } @@ -191,10 +193,13 @@ void NemoAsrReader::Prefetch() { void NemoAsrReader::RunImpl(SampleWorkspace &ws) { const auto &sample = GetSample(ws.data_idx()); - const auto &sample_audio = GetDecodedAudioSample(ws.data_idx()); + auto sample_audio = GetDecodedAudioSample(ws.data_idx()); auto &audio = ws.Output(0); - audio.Copy(sample_audio); + audio.Resize(sample_audio.shape(), sample_audio.type()); + std::memcpy( + audio.raw_mutable_data(), sample_audio._raw_data(), + sample_audio.shape().num_elements() * TypeTable::GetTypeInfo(sample_audio.type()).size()); DALIMeta meta; meta.SetSourceInfo(sample.audio_filepath()); @@ -226,8 +231,8 @@ void NemoAsrReader::RunImpl(SampleWorkspace &ws) { } } -Tensor& NemoAsrReader::GetDecodedAudioSample(int sample_idx) { - auto &curr_batch = *prefetched_decoded_audio_[curr_batch_consumer_]; +ConstSampleView NemoAsrReader::GetDecodedAudioSample(int sample_idx) { + const auto &curr_batch = *prefetched_decoded_audio_[curr_batch_consumer_]; return curr_batch[sample_idx]; } diff --git a/dali/operators/reader/nemo_asr_reader_op.h b/dali/operators/reader/nemo_asr_reader_op.h index 3e864261fe0..c8d00546a50 100644 --- a/dali/operators/reader/nemo_asr_reader_op.h +++ b/dali/operators/reader/nemo_asr_reader_op.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ class NemoAsrReader : public DataReader { void RunImpl(SampleWorkspace &ws) override; private: - Tensor& GetDecodedAudioSample(int sample_idx); + ConstSampleView GetDecodedAudioSample(int sample_idx); bool read_sr_; bool read_text_; diff --git a/dali/operators/reader/numpy_reader_gpu_op.cc b/dali/operators/reader/numpy_reader_gpu_op.cc index 5e3eb39a78e..c9dbe03e2d9 100644 --- a/dali/operators/reader/numpy_reader_gpu_op.cc +++ b/dali/operators/reader/numpy_reader_gpu_op.cc @@ -113,9 +113,9 @@ void NumpyReaderGPU::Prefetch() { } curr_tensor_list.Resize(tmp_shapes, ref_type); - size_t chunk_size = static_cast( \ - div_ceil(static_cast(curr_tensor_list.nbytes()), - static_cast(thread_pool_.NumThreads()))); + size_t chunk_size = + static_cast(div_ceil(static_cast(curr_tensor_list.total_nbytes()), + static_cast(thread_pool_.NumThreads()))); // read the data for (size_t data_idx = 0; data_idx < curr_tensor_list.num_samples(); ++data_idx) { diff --git a/dali/operators/reader/numpy_reader_op.cc b/dali/operators/reader/numpy_reader_op.cc index 26ba6865241..683c45ed096 100644 --- a/dali/operators/reader/numpy_reader_op.cc +++ b/dali/operators/reader/numpy_reader_op.cc @@ -14,20 +14,22 @@ #include +#include "dali/core/backend_tags.h" #include "dali/kernels/slice/slice_cpu.h" #include "dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h" #include "dali/kernels/transpose/transpose.h" #include "dali/core/static_switch.h" #include "dali/operators/reader/numpy_reader_op.h" +#include "dali/pipeline/data/backend.h" namespace dali { -static void CopyHelper(Tensor &output, const Tensor &input, +static void CopyHelper(SampleView output, ConstSampleView input, ThreadPool &thread_pool, int min_blk_sz, int req_nblocks) { - auto out_ptr = static_cast(output.raw_mutable_data()); - auto in_ptr = static_cast(input.raw_data()); - auto nelements = volume(input.shape()); - auto nbytes = input.nbytes(); + auto *out_ptr = static_cast(output._raw_mutable_data()); + const auto *in_ptr = static_cast(input._raw_data()); + auto nelements = input.shape().num_elements(); + auto nbytes = nelements * TypeTable::GetTypeInfo(input.type()).size(); if (nelements <= min_blk_sz) { thread_pool.AddWork([=](int tid) { std::memcpy(out_ptr, in_ptr, nbytes); @@ -45,7 +47,7 @@ static void CopyHelper(Tensor &output, const Tensor &inp } } -static void TransposeHelper(Tensor &output, const Tensor &input) { +static void TransposeHelper(SampleView output, ConstSampleView input) { int n_dims = input.shape().sample_dim(); SmallVector perm; perm.resize(n_dims); @@ -56,7 +58,7 @@ static void TransposeHelper(Tensor &output, const Tensor ), DALI_FAIL(make_string("Unsupported input type: ", input.type()))); // NOLINT } -static void SliceHelper(Tensor &output, const Tensor &input, +static void SliceHelper(SampleView output, ConstSampleView input, const CropWindow &roi, float fill_value, ThreadPool &thread_pool, int min_blk_sz, int req_nblocks) { int ndim = input.shape().sample_dim(); @@ -77,7 +79,7 @@ static void SliceHelper(Tensor &output, const Tensor &in ), DALI_FAIL(make_string("Unsupported number of dimensions: ", ndim));); // NOLINT } -static void SlicePermuteHelper(Tensor &output, const Tensor &input, +static void SlicePermuteHelper(SampleView output, ConstSampleView input, const CropWindow &roi, float fill_value, ThreadPool &thread_pool, int min_blk_sz, int req_nblocks) { const auto &in_shape = input.shape(); @@ -245,19 +247,20 @@ void NumpyReaderCPU::RunImpl(HostWorkspace &ws) { const auto& file_i = GetSample(i); const auto& file_sh = file_i.get_shape(); int64_t sample_sz = volume(file_i.get_shape()); + auto input_sample = const_sample_view(file_i.data); if (need_slice_[i] && need_transpose_[i]) { - SlicePermuteHelper(output[i], file_i.data, rois_[i], fill_value_, thread_pool, kThreshold, + SlicePermuteHelper(output[i], input_sample, rois_[i], fill_value_, thread_pool, kThreshold, blocks_per_sample); } else if (need_slice_[i]) { - SliceHelper(output[i], file_i.data, rois_[i], fill_value_, thread_pool, kThreshold, + SliceHelper(output[i], input_sample, rois_[i], fill_value_, thread_pool, kThreshold, blocks_per_sample); } else if (need_transpose_[i]) { // TODO(janton): Parallelize when Transpose supports tiling - thread_pool.AddWork([&, i](int tid) { - TransposeHelper(output[i], file_i.data); + thread_pool.AddWork([&, i, input_sample](int tid) { + TransposeHelper(output[i], input_sample); }, sample_sz * 8); // 8 x (heuristic) } else { - CopyHelper(output[i], file_i.data, thread_pool, kThreshold, blocks_per_sample); + CopyHelper(output[i], input_sample, thread_pool, kThreshold, blocks_per_sample); } } thread_pool.RunAll(); diff --git a/dali/operators/reader/video_reader_op.cc b/dali/operators/reader/video_reader_op.cc index 81dd2040526..a652ed84c0c 100644 --- a/dali/operators/reader/video_reader_op.cc +++ b/dali/operators/reader/video_reader_op.cc @@ -47,7 +47,7 @@ void VideoReader::Prefetch() { auto &sample = curr_batch[data_idx]; // TODO(klecki): Rework this with proper sample-based tensor batch data structure auto sample_shared_ptr = unsafe_sample_owner(curr_tensor_list, data_idx); - sample->sequence.ShareData(sample_shared_ptr, curr_tensor_list.capacity(), + sample->sequence.ShareData(sample_shared_ptr, curr_tensor_list.total_capacity(), curr_tensor_list.is_pinned(), curr_tensor_list.shape()[data_idx], curr_tensor_list.type(), curr_tensor_list.order()); sample->sequence.set_device_id(curr_tensor_list.device_id()); diff --git a/dali/operators/reader/webdataset_reader_op.cc b/dali/operators/reader/webdataset_reader_op.cc index 2883fb4a23e..7840aecaae9 100644 --- a/dali/operators/reader/webdataset_reader_op.cc +++ b/dali/operators/reader/webdataset_reader_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -52,8 +52,8 @@ void WebdatasetReader::RunImpl(HostWorkspace& ws) { auto& sample = GetSample(data_idx); ThreadPool::Work copy_task = [output_idx = output_idx, data_idx = data_idx, &output, &sample](int) { - output[data_idx].SetMeta(sample[output_idx].GetMeta()); - std::memcpy(output[data_idx].raw_mutable_data(), sample[output_idx].raw_data(), + output.SetMeta(data_idx, sample[output_idx].GetMeta()); + std::memcpy(output.raw_mutable_tensor(data_idx), sample[output_idx].raw_data(), sample[output_idx].nbytes()); }; if (threaded) { diff --git a/dali/operators/segmentation/random_object_bbox.cc b/dali/operators/segmentation/random_object_bbox.cc index c4e7138187c..5864b46d57c 100644 --- a/dali/operators/segmentation/random_object_bbox.cc +++ b/dali/operators/segmentation/random_object_bbox.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -521,9 +521,9 @@ bool RandomObjectBBox::PickForegroundBox( template bool RandomObjectBBox::PickForegroundBox(SampleContext &context) { bool ret = false; - TYPE_SWITCH(context.input->type(), type2id, T, INPUT_TYPES, - (ret = PickForegroundBox(context, view(*context.input));), - (DALI_FAIL(make_string("Unsupported input type: ", context.input->type()))) + TYPE_SWITCH(context.input.type(), type2id, T, INPUT_TYPES, + (ret = PickForegroundBox(context, view(context.input));), + (DALI_FAIL(make_string("Unsupported input type: ", context.input.type()))) ); // NOLINT return ret; } @@ -533,7 +533,7 @@ void RandomObjectBBox::AllocateTempStorage(const TensorVector &input int64_t max_filtered_bytes = 0; int N = input.num_samples(); for (int i = 0; i < N; i++) { - int64_t vol = input[i].size(); + int64_t vol = input[i].shape().num_elements(); int label_size = vol > 0x80000000 ? 8 : 4; int64_t blob_bytes = vol * label_size; if (blob_bytes > max_blob_bytes) @@ -592,10 +592,10 @@ void RandomObjectBBox::RunImpl(HostWorkspace &ws) { // We want to limit the size of this auxiliary storage to limit memory traffic. // To that end, when the indices fit in int32_t, we use that type for the labels, // otherwise we fall back to int64_t. - auto blob_label = (input[i].size() > 0x80000000) ? DALI_INT64 : DALI_INT32; + auto blob_label = (input[i].shape().num_elements() > 0x80000000) ? DALI_INT64 : DALI_INT32; TYPE_SWITCH(blob_label, type2id, BlobLabel, (int32_t, int64_t), ( auto &ctx = GetContext(BlobLabel()); - ctx.Init(i, &input[i], &tp, tmp_filtered_storage_, tmp_blob_storage_); + ctx.Init(i, input[i], &tp, tmp_filtered_storage_, tmp_blob_storage_); ctx.out1 = out1[i]; if (out2.num_samples() > 0) ctx.out2 = out2[i]; diff --git a/dali/operators/segmentation/random_object_bbox.h b/dali/operators/segmentation/random_object_bbox.h index e6e4574dcb3..c7c94dbe374 100644 --- a/dali/operators/segmentation/random_object_bbox.h +++ b/dali/operators/segmentation/random_object_bbox.h @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -175,12 +175,12 @@ class RandomObjectBBox : public Operator { template struct SampleContext { - void Init(int sample_idx, const Tensor *in, ThreadPool *tp, + void Init(int sample_idx, ConstSampleView in, ThreadPool *tp, Tensor &tmp_filtered, Tensor &tmp_blob) { this->sample_idx = sample_idx; thread_pool = tp; input = in; - auto &shape = input->shape(); + auto &shape = input.shape(); tmp_filtered.Resize(shape, DALI_UINT8); tmp_blob.Resize(shape, TypeTable::GetTypeId()); filtered = view(tmp_filtered); @@ -192,7 +192,7 @@ class RandomObjectBBox : public Operator { ThreadPool *thread_pool = nullptr; TensorView out1, out2; - const Tensor *input = nullptr; + ConstSampleView input = {}; int sample_idx; int class_idx; diff --git a/dali/operators/sequence/sequence_rearrange.cc b/dali/operators/sequence/sequence_rearrange.cc index 70d4a05b654..fffad1a80b7 100644 --- a/dali/operators/sequence/sequence_rearrange.cc +++ b/dali/operators/sequence/sequence_rearrange.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -79,9 +79,9 @@ void SequenceRearrange::RunImpl(workspace_t &ws) { for (int sample_idx = 0; sample_idx < curr_batch_size; ++sample_idx) { thread_pool.AddWork([this, &ws, &input, &output, sample_idx](int tid) { const TypeInfo &type = input.type_info(); - const auto *in_sample = reinterpret_cast(input[sample_idx].raw_data()); - auto *out_sample = reinterpret_cast(output[sample_idx].raw_mutable_data()); - const auto &in_shape = input[sample_idx].shape(); + const auto *in_sample = reinterpret_cast(input.raw_tensor(sample_idx)); + auto *out_sample = reinterpret_cast(output.raw_mutable_tensor(sample_idx)); + const auto &in_shape = input.tensor_shape(sample_idx); auto element_sizeof = volume(in_shape.last(in_shape.sample_dim() - 1)) * type.size(); TensorView new_order = {}; diff --git a/dali/operators/signal/fft/spectrogram.cc b/dali/operators/signal/fft/spectrogram.cc index 9b5be5617c5..0aa841377f3 100644 --- a/dali/operators/signal/fft/spectrogram.cc +++ b/dali/operators/signal/fft/spectrogram.cc @@ -227,8 +227,8 @@ bool SpectrogramImplCpu::SetupImpl(std::vector &out_desc auto view_window_fn = make_tensor_cpu<1>(window_fn_.data(), window_length_); for (int i = 0; i < nsamples; i++) { - auto view_signal_1d = - make_tensor_cpu<1>(input[i].template data(), {input[i].size()}); + auto view_signal_1d = make_tensor_cpu<1>(input.template tensor(i), + {input.tensor_shape(i).num_elements()}); auto &windows_req = kmgr_window_.Setup( @@ -269,8 +269,8 @@ void SpectrogramImplCpu::RunImpl(workspace_t &ws) { win_out.set_type(); win_out.Resize(window_out_desc_[0].shape.tensor_shape(i)); - auto view_signal_1d = - make_tensor_cpu<1>(input[i].data(), {input[i].size()}); + auto view_signal_1d = make_tensor_cpu<1>(input.tensor(i), + {input.tensor_shape(i).num_elements()}); kmgr_window_.Run( i, ctx, view(win_out), diff --git a/dali/operators/util/property.cc b/dali/operators/util/property.cc index 442bb26437b..898be949387 100644 --- a/dali/operators/util/property.cc +++ b/dali/operators/util/property.cc @@ -24,7 +24,7 @@ void SourceInfo::FillOutput(workspace_t& ws) { auto& output = ws.template Output(0); for (size_t sample_id = 0; sample_id < input.num_samples(); sample_id++) { auto si = GetSourceInfo(input, sample_id); - output[sample_id].Copy(make_cspan((const uint8_t*)si.c_str(), si.length())); + std::memcpy(output.mutable_tensor(sample_id), si.c_str(), si.length()); } } @@ -34,8 +34,7 @@ void Layout::FillOutput(workspace_t& ws) { auto& output = ws.template Output(0); for (size_t sample_id = 0; sample_id < input.num_samples(); sample_id++) { auto layout = GetLayout(input, sample_id); - output[sample_id].Copy( - make_cspan(reinterpret_cast(layout.c_str()), layout.size())); + std::memcpy(output.mutable_tensor(sample_id), layout.c_str(), layout.size()); } } diff --git a/dali/operators/util/property.h b/dali/operators/util/property.h index 9e7f53aa55a..272d0eaaed9 100644 --- a/dali/operators/util/property.h +++ b/dali/operators/util/property.h @@ -1,4 +1,4 @@ -// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ namespace tensor_property { namespace detail { inline const DALIMeta& GetMeta(const TensorVector& batch, int tensor_idx) { - return batch[tensor_idx].GetMeta(); + return batch.GetMeta(tensor_idx); } inline const DALIMeta& GetMeta(const TensorList& batch, int tensor_idx) { diff --git a/dali/pipeline/data/dltensor.h b/dali/pipeline/data/dltensor.h index b75cec6e390..1509a02a4cf 100644 --- a/dali/pipeline/data/dltensor.h +++ b/dali/pipeline/data/dltensor.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,10 +16,12 @@ #define DALI_PIPELINE_DATA_DLTENSOR_H_ #include -#include #include +#include #include "third_party/dlpack/include/dlpack/dlpack.h" -#include "dali/pipeline/data/tensor.h" + +#include "dali/pipeline/data/sample_view.h" +#include "dali/pipeline/data/tensor_list.h" namespace dali { @@ -48,11 +50,11 @@ DLL_PUBLIC DLMTensorPtr MakeDLTensor(void *data, DALIDataType type, std::unique_ptr resource); template -DLMTensorPtr GetDLTensorView(Tensor &tensor) { - return MakeDLTensor(tensor.raw_mutable_data(), +DLMTensorPtr GetDLTensorView(SampleView tensor, int device_id) { + return MakeDLTensor(tensor._raw_mutable_data(), tensor.type(), std::is_same::value, - tensor.device_id(), + device_id, std::make_unique(tensor.shape())); } diff --git a/dali/pipeline/data/dltensor_test.cc b/dali/pipeline/data/dltensor_test.cc index ade6ef6cec6..5c6af7c4129 100644 --- a/dali/pipeline/data/dltensor_test.cc +++ b/dali/pipeline/data/dltensor_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,19 +14,24 @@ #include #include + +#include "dali/pipeline/data/backend.h" #include "dali/pipeline/data/dltensor.h" +#include "dali/pipeline/data/sample_view.h" +#include "dali/pipeline/data/tensor.h" namespace dali { TEST(DLMTensorPtr, CPU) { Tensor tensor; tensor.Resize({100, 50, 3}, DALI_FLOAT); - DLMTensorPtr dlm_tensor = GetDLTensorView(tensor); + SampleView sv{tensor.raw_mutable_data(), tensor.shape(), tensor.type()}; + DLMTensorPtr dlm_tensor = GetDLTensorView(sv, tensor.device_id()); ASSERT_EQ(dlm_tensor->dl_tensor.ndim, 3); ASSERT_EQ(dlm_tensor->dl_tensor.shape[0], 100); ASSERT_EQ(dlm_tensor->dl_tensor.shape[1], 50); ASSERT_EQ(dlm_tensor->dl_tensor.shape[2], 3); - ASSERT_EQ(dlm_tensor->dl_tensor.data, tensor.raw_data()); + ASSERT_EQ(dlm_tensor->dl_tensor.data, sv._raw_data()); ASSERT_EQ(dlm_tensor->dl_tensor.dtype.code, kDLFloat); ASSERT_EQ(dlm_tensor->dl_tensor.dtype.bits, sizeof(float) * 8); ASSERT_EQ(dlm_tensor->dl_tensor.device.device_type, kDLCPU); @@ -36,12 +41,13 @@ TEST(DLMTensorPtr, CPU) { TEST(DLMTensorPtr, GPU) { Tensor tensor; tensor.Resize({100, 50, 1}, DALI_INT32); - DLMTensorPtr dlm_tensor = GetDLTensorView(tensor); + SampleView sv{tensor.raw_mutable_data(), tensor.shape(), tensor.type()}; + DLMTensorPtr dlm_tensor = GetDLTensorView(sv, tensor.device_id()); ASSERT_EQ(dlm_tensor->dl_tensor.ndim, 3); ASSERT_EQ(dlm_tensor->dl_tensor.shape[0], 100); ASSERT_EQ(dlm_tensor->dl_tensor.shape[1], 50); ASSERT_EQ(dlm_tensor->dl_tensor.shape[2], 1); - ASSERT_EQ(dlm_tensor->dl_tensor.data, tensor.raw_data()); + ASSERT_EQ(dlm_tensor->dl_tensor.data, sv._raw_data()); ASSERT_EQ(dlm_tensor->dl_tensor.dtype.code, kDLInt); ASSERT_EQ(dlm_tensor->dl_tensor.dtype.bits, sizeof(int) * 8); ASSERT_EQ(dlm_tensor->dl_tensor.device.device_type, kDLCUDA); diff --git a/dali/pipeline/data/sample_view.h b/dali/pipeline/data/sample_view.h index 057b08a5463..0d404be8c92 100644 --- a/dali/pipeline/data/sample_view.h +++ b/dali/pipeline/data/sample_view.h @@ -33,8 +33,8 @@ namespace dali { * convenient `view(SampleView)` conversion to TensorView, but doesn't break the batch * object encapsulation and doesn't allow to adjust the allocation. */ -template -class SampleView { +template +class SampleViewBase { public: /** * @name Get the underlying pointer to data @@ -43,14 +43,15 @@ class SampleView { /** * @brief Return an un-typed pointer to the underlying storage. */ - void *raw_mutable_data() { + template + std::enable_if_t::value, void *> _raw_mutable_data() { return data_; } /** * @brief Return a const, un-typed pointer to the underlying storage. */ - const void *raw_data() const { + const void *_raw_data() const { return data_; } @@ -58,8 +59,8 @@ class SampleView { * @brief Returns a typed pointer to the underlying storage. * The calling type must match the underlying type of the buffer. */ - template - inline T *mutable_data() { + template + inline std::enable_if_t::value, T *> _mutable_data() { DALI_ENFORCE( type() == TypeTable::GetTypeId(), make_string( @@ -74,14 +75,14 @@ class SampleView { * The calling type must match the underlying type of the buffer. */ template - inline const T *data() const { + inline const T *_data() const { DALI_ENFORCE( type() == TypeTable::GetTypeId(), make_string( "Calling type does not match buffer data type, requested type: ", TypeTable::GetTypeId(), " current buffer type: ", type(), ". To set type for the Buffer use 'set_type()' or Resize(shape, type) first.")); - return static_cast(data_); + return static_cast(data_); } //@} @@ -100,16 +101,16 @@ class SampleView { } - SampleView() = default; + SampleViewBase() = default; - SampleView(const SampleView &) = default; - SampleView &operator=(const SampleView &) = default; + SampleViewBase(const SampleViewBase &) = default; + SampleViewBase &operator=(const SampleViewBase &) = default; - SampleView(SampleView &&other) { + SampleViewBase(SampleViewBase &&other) { *this = std::move(other); } - SampleView &operator=(SampleView &&other) { + SampleViewBase &operator=(SampleViewBase &&other) { if (this != &other) { data_ = other.data_; other.data_ = nullptr; @@ -125,24 +126,79 @@ class SampleView { * @brief Construct the view inferring the type_id from the pointer value. */ template - SampleView(T *data, TensorShape<> shape) - : data_(data), shape_(std::move(shape)), type_id_(TypeTable::GetTypeId()) {} + SampleViewBase(T *data, TensorShape<> shape) + : data_(data), + shape_(std::move(shape)), + type_id_(TypeTable::GetTypeId>()) {} /** * @brief Construct the view with explicitly provided type_id. */ - SampleView(void *data, const TensorShape<> shape, DALIDataType type_id) + SampleViewBase(ptr_t data, const TensorShape<> shape, DALIDataType type_id) : data_(data), shape_(std::move(shape)), type_id_(type_id) {} - private: + protected: // TODO(klecki): The view is introduced with no co-owning pointer, it will be evaluated // if the usage of shared_ptr is possbile and adjusted if necessary. // Using shared_ptr might allow for sample exchange between two batches using operator[] - void *data_ = nullptr; + ptr_t data_ = nullptr; TensorShape<> shape_ = {0}; DALIDataType type_id_ = DALI_NO_TYPE; }; + +template +class SampleView : public SampleViewBase { + public: + using Base = SampleViewBase; + using Base::Base; + + private: + using Base::data_; + using Base::shape_; + using Base::type_id_; +}; + + +template +class ConstSampleView : public SampleViewBase { + public: + using Base = SampleViewBase; + using Base::Base; + + explicit ConstSampleView(const SampleView &other) + : Base(other._raw_data(), other.shape(), other.type()) {} + + ConstSampleView &operator=(const SampleView &other) { + data_ = other.data(); + shape_ = other.shape(); + type_id_ = other.type(); + return *this; + } + + explicit ConstSampleView(SampleView &&other) { + *this = std::move(other); + } + + ConstSampleView &operator=(SampleView &&other) { + if (this != &other) { + data_ = other.data_; + other.data_ = nullptr; + shape_ = std::move(other.shape_); + other.shape_ = {0}; + type_id_ = other.type_id_; + other.type_id_ = DALI_NO_TYPE; + } + return *this; + } + + private: + using Base::data_; + using Base::shape_; + using Base::type_id_; +}; + + } // namespace dali #endif // DALI_PIPELINE_DATA_SAMPLE_VIEW_H_ diff --git a/dali/pipeline/data/sample_view_test.cc b/dali/pipeline/data/sample_view_test.cc index 7a674e44718..f4d078dabd3 100644 --- a/dali/pipeline/data/sample_view_test.cc +++ b/dali/pipeline/data/sample_view_test.cc @@ -26,11 +26,10 @@ namespace dali { - template void compare(const SampleView &sv, const void *ptr, const TensorShape<> &shape, DALIDataType dtype) { - EXPECT_EQ(sv.raw_data(), ptr); + EXPECT_EQ(sv._raw_data(), ptr); EXPECT_EQ(sv.shape(), shape); EXPECT_EQ(sv.type(), dtype); } @@ -53,13 +52,45 @@ TEST(SampleView, Constructors) { SampleView from_void_ptr{reinterpret_cast(42), {1, 2, 3}, DALI_FLOAT}; compare(from_void_ptr, reinterpret_cast(42), {1, 2, 3}, DALI_FLOAT); + + const int32_t cdata{}; + ConstSampleView const_from_ptr{&cdata, {1, 2, 3}}; + compare(const_from_ptr, &cdata, {1, 2, 3}, DALI_INT32); + + ConstSampleView const_from_void_ptr{ + reinterpret_cast(42), {1, 2, 3}, DALI_FLOAT}; + compare(const_from_void_ptr, reinterpret_cast(42), {1, 2, 3}, DALI_FLOAT); + + ConstSampleView const_from_nonconst{from_ptr}; + compare(const_from_nonconst, &data, {1, 2, 3}, DALI_INT32); +} + + +TEST(SampleView, FromTensor) { + Tensor tensor; + tensor.Resize({1, 2, 3}, DALI_INT32); + + auto sv = sample_view(tensor); + auto csv = const_sample_view(tensor); + + compare(sv, tensor.raw_data(), {1, 2, 3}, DALI_INT32); + compare(csv, tensor.raw_data(), {1, 2, 3}, DALI_INT32); + + Tensor scalar_tensor; + scalar_tensor.Resize({}, DALI_FLOAT); + + auto scalar_sv = sample_view(scalar_tensor); + auto scalar_csv = const_sample_view(scalar_tensor); + + compare(scalar_sv, scalar_tensor.raw_data(), {}, DALI_FLOAT); + compare(scalar_csv, scalar_tensor.raw_data(), {}, DALI_FLOAT); } TEST(SampleView, ViewConversion) { int32_t data{}; SampleView sample_view{&data, {1, 2, 3}}; - const SampleView const_sample_view{&data, {1, 2, 3}}; + ConstSampleView const_sample_view{&data, {1, 2, 3}}; compare(view(sample_view), TensorView{&data, {1, 2, 3}}); compare(view(sample_view), TensorView{&data, {1, 2, 3}}); @@ -77,10 +108,11 @@ TEST(SampleView, ViewConversion) { TEST(SampleView, ViewConversionError) { int32_t data{}; SampleView sample_view{&data, {1, 2, 3}}; - const SampleView const_sample_view{&data, {1, 2, 3}}; + ConstSampleView const_sample_view{&data, {1, 2, 3}}; EXPECT_THROW(view(sample_view), std::runtime_error); EXPECT_THROW(view(sample_view), std::runtime_error); EXPECT_THROW(view(const_sample_view), std::runtime_error); } + } // namespace dali diff --git a/dali/pipeline/data/tensor.h b/dali/pipeline/data/tensor.h index aa86bf5dafb..c38c8b51bc8 100644 --- a/dali/pipeline/data/tensor.h +++ b/dali/pipeline/data/tensor.h @@ -422,6 +422,10 @@ class Tensor : public Buffer { return *this; } + DALIMeta &GetMeta() { + return meta_; + } + const DALIMeta &GetMeta() const { return meta_; } diff --git a/dali/pipeline/data/tensor_list.h b/dali/pipeline/data/tensor_list.h index 67e440c3c3f..2f8c1ce7ca2 100644 --- a/dali/pipeline/data/tensor_list.h +++ b/dali/pipeline/data/tensor_list.h @@ -113,8 +113,8 @@ class DLL_PUBLIC TensorList { template DLL_PUBLIC inline void Copy(const TensorVector &other, AccessOrder order = {}, bool use_copy_kernel = false) { - auto type = other[0].type(); - auto layout = other[0].GetLayout(); + auto type = other.type(); + auto layout = other.GetLayout(); int dim = other[0].shape().sample_dim(); TensorListShape<> new_shape(other.num_samples(), dim); @@ -124,7 +124,7 @@ class DLL_PUBLIC TensorList { + std::to_string(i) + " expected Tensor with dim = " + to_string(dim) + " found Tensor with dim = " + to_string(other[i].shape().sample_dim())); assert(type == other[i].type()); - assert(layout == other[i].GetLayout()); + assert(layout == other.GetMeta(i).GetLayout()); new_shape.set_tensor_shape(i, other[i].shape()); } @@ -145,10 +145,9 @@ class DLL_PUBLIC TensorList { sizes.reserve(nsamples); for (size_t i = 0; i < nsamples; i++) { dsts.emplace_back(this->raw_mutable_tensor(i)); - srcs.emplace_back(other[i].raw_data()); - sizes.emplace_back(other[i].size()); - this->meta_[i].SetSourceInfo(other[i].GetSourceInfo()); - this->meta_[i].SetSkipSample(other[i].ShouldSkipSample()); + srcs.emplace_back(other[i]._raw_data()); + sizes.emplace_back(other[i].shape().num_elements()); + this->meta_[i] = other.GetMeta(i); } use_copy_kernel &= (std::is_same::value || other.is_pinned()) && @@ -605,6 +604,10 @@ class DLL_PUBLIC TensorList { return meta_[idx].ShouldSkipSample(); } + inline DALIMeta &GetMeta(int idx) { + return meta_[idx]; + } + inline const DALIMeta &GetMeta(int idx) const { return meta_[idx]; } @@ -636,17 +639,31 @@ class DLL_PUBLIC TensorList { /** * @brief Returns the size in bytes of the underlying data */ - size_t nbytes() const { + size_t total_nbytes() const { return data_.nbytes(); } /** * @brief Returns the real size of the allocation */ - size_t capacity() const { + size_t total_capacity() const { return data_.capacity(); } + /** + * @brief Returns the size in bytes of the underlying data + */ + std::vector nbytes() const { + return {data_.nbytes()}; + } + + /** + * @brief Returns the real size of the allocation + */ + std::vector capacity() const { + return {data_.capacity()}; + } + /** * @brief Set the type of the TensorList. The type needs to be set before calling * the Resize function that gives the shape. Type can be changed, if the current storage diff --git a/dali/pipeline/data/tensor_list_test.cc b/dali/pipeline/data/tensor_list_test.cc index e13df466127..699a431b778 100644 --- a/dali/pipeline/data/tensor_list_test.cc +++ b/dali/pipeline/data/tensor_list_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -107,7 +107,7 @@ TYPED_TEST(TensorListTest, TestGetTypeSizeBytes) { tl.template set_type(); ASSERT_EQ(tl._num_elements(), 0); - ASSERT_EQ(tl.nbytes(), 0); + ASSERT_EQ(tl.total_nbytes(), 0); ASSERT_FALSE(tl.has_data()); // Give the tensor list a size. This @@ -127,7 +127,7 @@ TYPED_TEST(TensorListTest, TestGetTypeSizeBytes) { ASSERT_TRUE(tl.has_data()); ASSERT_EQ(tl.num_samples(), num_tensor); ASSERT_EQ(tl._num_elements(), size); - ASSERT_EQ(tl.nbytes(), size*sizeof(float)); + ASSERT_EQ(tl.total_nbytes(), size*sizeof(float)); ASSERT_TRUE(IsType(tl.type())); tl.reserve(shape.num_elements() * sizeof(float)); @@ -146,8 +146,8 @@ TYPED_TEST(TensorListTest, TestReserveResize) { ASSERT_THROW(tl.set_pinned(true), std::runtime_error); ASSERT_TRUE(tl.has_data()); - ASSERT_EQ(tl.capacity(), shape.num_elements() * sizeof(float)); - ASSERT_EQ(tl.nbytes(), 0); + ASSERT_EQ(tl.total_capacity(), shape.num_elements() * sizeof(float)); + ASSERT_EQ(tl.total_nbytes(), 0); ASSERT_EQ(tl._num_elements(), 0); ASSERT_NE(unsafe_raw_data(tl), nullptr); @@ -155,7 +155,7 @@ TYPED_TEST(TensorListTest, TestReserveResize) { tl.template set_type(); ASSERT_EQ(tl._num_elements(), 0); - ASSERT_EQ(tl.nbytes(), 0); + ASSERT_EQ(tl.total_nbytes(), 0); ASSERT_TRUE(tl.has_data()); // We already had the allocation, just give it a shape and a type @@ -173,7 +173,7 @@ TYPED_TEST(TensorListTest, TestReserveResize) { ASSERT_TRUE(tl.has_data()); ASSERT_EQ(tl.num_samples(), num_tensor); ASSERT_EQ(tl._num_elements(), size); - ASSERT_EQ(tl.nbytes(), size*sizeof(float)); + ASSERT_EQ(tl.total_nbytes(), size*sizeof(float)); ASSERT_TRUE(IsType(tl.type())); @@ -214,7 +214,7 @@ TYPED_TEST(TensorListTest, TestGetContiguousPointer) { // Verify the internals ASSERT_EQ(tl._num_elements(), volume); ASSERT_EQ(tl.num_samples(), num_tensor); - ASSERT_EQ(tl.nbytes(), volume * sizeof(uint32_t)); + ASSERT_EQ(tl.total_nbytes(), volume * sizeof(uint32_t)); ASSERT_EQ(tl.type(), DALI_UINT32); ASSERT_TRUE(tl.IsContiguous()); ASSERT_NE(unsafe_raw_data(tl), nullptr); @@ -244,7 +244,7 @@ TYPED_TEST(TensorListTest, TestGetBytesThenNoAlloc) { ASSERT_EQ(tl.raw_tensor(i), sharer.raw_tensor(i)); } ASSERT_EQ(tl._num_elements(), size); - ASSERT_EQ(tl.nbytes(), size*sizeof(float)); + ASSERT_EQ(tl.total_nbytes(), size*sizeof(float)); ASSERT_EQ(tl.type(), sharer.type()); ASSERT_EQ(tl.num_samples(), num_tensor); ASSERT_TRUE(tl.shares_data()); @@ -279,7 +279,7 @@ TYPED_TEST(TensorListTest, TestGetBytesThenAlloc) { ASSERT_EQ(tl.raw_tensor(i), sharer.raw_tensor(i)); } ASSERT_EQ(tl._num_elements(), size); - ASSERT_EQ(tl.nbytes(), size*sizeof(float)); + ASSERT_EQ(tl.total_nbytes(), size*sizeof(float)); ASSERT_EQ(tl.type(), sharer.type()); ASSERT_EQ(tl.num_samples(), num_tensor); ASSERT_TRUE(tl.shares_data()); @@ -298,7 +298,7 @@ TYPED_TEST(TensorListTest, TestZeroSizeResize) { tensor_list.Resize(shape); ASSERT_FALSE(tensor_list.has_data()); - ASSERT_EQ(tensor_list.nbytes(), 0); + ASSERT_EQ(tensor_list.total_nbytes(), 0); ASSERT_EQ(tensor_list._num_elements(), 0); ASSERT_FALSE(tensor_list.shares_data()); } @@ -311,7 +311,7 @@ TYPED_TEST(TensorListTest, TestMultipleZeroSizeResize) { tensor_list.Resize(shape, DALI_FLOAT); ASSERT_FALSE(tensor_list.has_data()); - ASSERT_EQ(tensor_list.nbytes(), 0); + ASSERT_EQ(tensor_list.total_nbytes(), 0); ASSERT_EQ(tensor_list.num_samples(), num_tensor); ASSERT_EQ(tensor_list._num_elements(), 0); ASSERT_FALSE(tensor_list.shares_data()); @@ -333,7 +333,7 @@ TYPED_TEST(TensorListTest, TestFakeScalarResize) { tensor_list.Resize(shape); ASSERT_TRUE(tensor_list.has_data()); - ASSERT_EQ(tensor_list.nbytes(), num_scalar*sizeof(float)); + ASSERT_EQ(tensor_list.total_nbytes(), num_scalar*sizeof(float)); ASSERT_EQ(tensor_list._num_elements(), num_scalar); ASSERT_FALSE(tensor_list.shares_data()); @@ -353,7 +353,7 @@ TYPED_TEST(TensorListTest, TestTrueScalarResize) { tensor_list.Resize(shape); ASSERT_TRUE(tensor_list.has_data()); - ASSERT_EQ(tensor_list.nbytes(), num_scalar*sizeof(float)); + ASSERT_EQ(tensor_list.total_nbytes(), num_scalar*sizeof(float)); ASSERT_EQ(tensor_list._num_elements(), num_scalar); ASSERT_FALSE(tensor_list.shares_data()); @@ -456,7 +456,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSameSize) { for (size_t i = 0; i < tensor_list.num_samples(); i++) { ptrs.push_back(tensor_list.raw_tensor(i)); } - size_t nbytes = tensor_list.nbytes(); + size_t nbytes = tensor_list.total_nbytes(); // Change the data type tensor_list.template set_type(); @@ -470,7 +470,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSameSize) { } // No memory allocation should have occurred - ASSERT_EQ(nbytes, tensor_list.nbytes()); + ASSERT_EQ(nbytes, tensor_list.total_nbytes()); } TYPED_TEST(TensorListTest, TestTypeChangeSmaller) { @@ -482,7 +482,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSmaller) { this->SetupTensorList(&tensor_list, shape, &offsets); - size_t nbytes = tensor_list.nbytes(); + size_t nbytes = tensor_list.total_nbytes(); const auto *base_ptr = unsafe_raw_data(tensor_list); // Change the data type to something smaller @@ -497,7 +497,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSmaller) { } // nbytes should have reduced by a factor of 4 - ASSERT_EQ(nbytes / sizeof(float) * sizeof(uint8), tensor_list.nbytes()); + ASSERT_EQ(nbytes / sizeof(float) * sizeof(uint8), tensor_list.total_nbytes()); } TYPED_TEST(TensorListTest, TestTypeChangeLarger) { @@ -509,7 +509,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeLarger) { this->SetupTensorList(&tensor_list, shape, &offsets); - size_t nbytes = tensor_list.nbytes(); + size_t nbytes = tensor_list.total_nbytes(); // Change the data type to something larger tensor_list.template set_type(); @@ -522,7 +522,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeLarger) { } // nbytes should have increased by a factor of 2 - ASSERT_EQ(nbytes / sizeof(float) * sizeof(double), tensor_list.nbytes()); + ASSERT_EQ(nbytes / sizeof(float) * sizeof(double), tensor_list.total_nbytes()); } TYPED_TEST(TensorListTest, TestShareData) { @@ -559,7 +559,7 @@ TYPED_TEST(TensorListTest, TestShareData) { // Check the internals ASSERT_TRUE(tensor_list2.shares_data()); - ASSERT_EQ(tensor_list2.nbytes(), tensor_list.nbytes()); + ASSERT_EQ(tensor_list2.total_nbytes(), tensor_list.total_nbytes()); ASSERT_EQ(tensor_list2.num_samples(), tensor_list.num_samples()); ASSERT_EQ(tensor_list2._num_elements(), tensor_list._num_elements()); for (size_t i = 0; i < tensor_list.num_samples(); ++i) { @@ -575,7 +575,7 @@ TYPED_TEST(TensorListTest, TestShareData) { // Check the internals ASSERT_EQ(tensor_list2._num_elements(), 0); - ASSERT_EQ(tensor_list2.nbytes(), 0); + ASSERT_EQ(tensor_list2.total_nbytes(), 0); ASSERT_EQ(tensor_list2.num_samples(), 0); ASSERT_EQ(tensor_list2.shape(), TensorListShape<>()); } diff --git a/dali/pipeline/data/tensor_test.cc b/dali/pipeline/data/tensor_test.cc index 44baaf1b560..537118517d1 100644 --- a/dali/pipeline/data/tensor_test.cc +++ b/dali/pipeline/data/tensor_test.cc @@ -361,7 +361,8 @@ TYPED_TEST(TensorTest, TestShareData) { for (int i = 0; i < num_tensor; ++i) { // TODO(klecki): Rework this with proper sample-based tensor batch data structure auto sample_shared_ptr = unsafe_sample_owner(tl, i); - tensor.ShareData(sample_shared_ptr, tl.capacity(), tl.is_pinned(), tl.shape()[i], tl.type()); + tensor.ShareData(sample_shared_ptr, tl.total_capacity(), tl.is_pinned(), tl.shape()[i], + tl.type()); tensor.set_device_id(tl.device_id()); tensor.SetMeta(tl.GetMeta(i)); @@ -379,11 +380,11 @@ TYPED_TEST(TensorTest, TestShareData) { TYPED_TEST(TensorTest, TestCopyToTensorList) { TensorVector tensors(16); - for (auto& t : tensors) { - auto shape = this->GetRandShape(4, 4); - t->template set_type(); - t->Resize(shape); + TensorListShape<4> shape(16); + for (int i = 0; i < 16; i++) { + shape.set_tensor_shape(i, this->GetRandShape(4, 4)); } + tensors.Resize(shape, DALI_FLOAT); TensorList tl; tl.Copy(tensors); @@ -394,8 +395,8 @@ TYPED_TEST(TensorTest, TestCopyToTensorList) { ASSERT_EQ(tensors[i].type(), tl.type()); ASSERT_EQ(tensors[i].shape(), tl.tensor_shape(i)); Index size = volume(tl.tensor_shape(i)); - ASSERT_EQ(tensors[i].size(), size); - ASSERT_EQ(tensors[i].nbytes(), size*sizeof(float)); + ASSERT_EQ(tensors[i].shape().num_elements(), size); + ASSERT_EQ(tensors[i].shape().num_elements() * tensors.type_info().size(), size*sizeof(float)); } } diff --git a/dali/pipeline/data/tensor_vector.cc b/dali/pipeline/data/tensor_vector.cc index 8740a97bfb2..1b78a90529d 100644 --- a/dali/pipeline/data/tensor_vector.cc +++ b/dali/pipeline/data/tensor_vector.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "dali/pipeline/data/tensor_vector.h" +#include "dali/core/common.h" namespace dali { @@ -62,11 +63,112 @@ TensorVector::TensorVector(TensorVector &&other) noexcept { other.tensors_.clear(); } - template -size_t TensorVector::nbytes() const noexcept { +void TensorVector::UnsafeSetSample(int dst, const TensorVector &owner, int src) { + // TODO(klecki): more consistency checks, contiguous -> non-contiguous removes shares_data from + // samples + if (type() == DALI_NO_TYPE && owner.type() != DALI_NO_TYPE) { + set_type(owner.type()); + } + if (!order()) { + set_order(owner.order()); + } + // Bounds check + assert(dst >= 0 && dst < static_cast(curr_tensors_size_)); + assert(src >= 0 && src < static_cast(owner.curr_tensors_size_)); + DALI_ENFORCE(type() == owner.type(), + make_string("Sample must have the same type as a target batch, current: ", type(), + " new: ", owner.type(), " for ", dst, " <- ", src, ".")); + DALI_ENFORCE(tensor_shape(dst) == TensorShape<>{0} || sample_dim() == owner.shape().sample_dim(), + make_string("Sample must have the same dimensionality as a target batch, current: ", + sample_dim(), " new: ", owner.shape().sample_dim(), " for ", dst, " <- ", + src, ".")); + DALI_ENFORCE(this->order() == owner.order(), "Sample must have the same order as a target batch"); + DALI_ENFORCE( + GetLayout() == "" || GetLayout() == owner.GetLayout(), + make_string("Sample must have the same layout as a target batch current: ", GetLayout(), + " new: ", owner.GetLayout(), " for ", dst, " <- ", src, ".")); + + SetContiguous(false); + // Setting a new share overwrites the previous one - so we can safely assume that even if + // we had a sample sharing into TL, it will be overwritten + tensors_[dst]->ShareData(*owner.tensors_[src]); + tl_->Reset(); +} + +template +void TensorVector::UnsafeSetSample(int dst, const Tensor &owner) { + // TODO(klecki): more consistency checks, contiguous -> non-contiguous removes shares_data from + // samples + if (type() == DALI_NO_TYPE && owner.type() != DALI_NO_TYPE) { + set_type(owner.type()); + } + if (!order()) { + set_order(owner.order()); + } + // Bounds check + assert(dst >= 0 && dst < static_cast(curr_tensors_size_)); + DALI_ENFORCE(type() == owner.type(), + make_string("Sample must have the same type as a target batch, current: ", type(), + " new: ", owner.type(), " for ", dst, " <-.")); + DALI_ENFORCE( + tensor_shape(dst) == TensorShape<>{0} || sample_dim() == owner.shape().sample_dim(), + make_string("Sample must have the same dimensionality as a target batch, current: ", + sample_dim(), " new: ", owner.shape().sample_dim(), " for ", dst, " <-.")); + DALI_ENFORCE(this->order() == owner.order(), "Sample must have the same order as a target batch"); + DALI_ENFORCE( + GetLayout() == "" || GetLayout() == owner.GetLayout(), + make_string("Sample must have the same layout as a target batch current: ", GetLayout(), + " new: ", owner.GetLayout(), " for ", dst, " <-.")); + SetContiguous(false); + // Setting a new share overwrites the previous one - so we can safely assume that even if + // we had a sample sharing into TL, it will be overwritten + tensors_[dst]->ShareData(owner); + tl_->Reset(); +} + +template +void TensorVector::UnsafeCopySample(int dst, const TensorVector &data, int src, + AccessOrder order) { + // TODO(klecki): more consistency checks, contiguous -> non-contiguous removes shares_data from + // samples + if (type() == DALI_NO_TYPE && data.type() != DALI_NO_TYPE) { + set_type(data.type()); + } + // Bounds check + assert(dst >= 0 && dst < static_cast(curr_tensors_size_)); + assert(src >= 0 && src < static_cast(data.curr_tensors_size_)); + DALI_ENFORCE(type() == data.type(), + make_string("Sample must have the same type as a target batch, current: ", type(), + " new: ", data.type(), " for ", dst, " <- ", src, ".")); + DALI_ENFORCE(tensor_shape(dst) == TensorShape<>{0} || sample_dim() == data.shape().sample_dim(), + make_string("Sample must have the same dimensionality as a target batch, current: ", + sample_dim(), " new: ", data.shape().sample_dim(), " for ", dst, " <- ", + src, ".")); + DALI_ENFORCE( + GetLayout() == "" || GetLayout() == data.GetLayout(), + make_string("Sample must have the same layout as a target batch current: ", GetLayout(), + " new: ", data.GetLayout(), " for ", dst, " <- ", src, ".")); + + // Either the shape matches and we can copy data as is or the target is just an individual sample + bool can_copy = + tensors_[dst]->shape() == data.tensors_[src]->shape() || + (!tl_->has_data() && state_ == State::noncontiguous); + + DALI_ENFORCE( + can_copy, + "Copying samples into TensorVector can happen either for exact shape match or when the " + "TensorVector is truly non contiguous. Either Resize first to the desired shape or reset the " + "TensorVector and SetSize for desired number of samples in non-contiguous mode."); + + tensors_[dst]->Copy(*data.tensors_[src], order); +} + + +template +size_t TensorVector::total_nbytes() const noexcept { if (state_ == State::contiguous) { - return tl_->nbytes(); + return tl_->total_nbytes(); } // else size_t total_nbytes = 0; @@ -78,9 +180,9 @@ size_t TensorVector::nbytes() const noexcept { template -size_t TensorVector::capacity() const noexcept { +size_t TensorVector::total_capacity() const noexcept { if (state_ == State::contiguous) { - return tl_->capacity(); + return tl_->total_capacity(); } // else size_t total_capacity = 0; @@ -90,6 +192,33 @@ size_t TensorVector::capacity() const noexcept { return total_capacity; } +template +std::vector TensorVector::nbytes() const noexcept { + if (state_ == State::contiguous) { + return {tl_->nbytes()}; + } + // else + std::vector result(tensors_.size()); + for (size_t i = 0; i < tensors_.size(); i++) { + result[i] = tensors_[i]->nbytes(); + } + return result; +} + + +template +std::vector TensorVector::capacity() const noexcept { + if (state_ == State::contiguous) { + return {tl_->capacity()}; + } + // else + std::vector result(tensors_.size()); + for (size_t i = 0; i < tensors_.size(); i++) { + result[i] = tensors_[i]->capacity(); + } + return result; +} + template TensorListShape<> TensorVector::shape() const { @@ -125,6 +254,7 @@ void TensorVector::set_order(AccessOrder order, bool synchronize) { tl_->set_order(order, false); for (auto &t : tensors_) t->set_order(order, false); + order_ = order; } template @@ -223,6 +353,11 @@ TensorLayout TensorVector::GetLayout() const { return {}; } +template +DALIMeta &TensorVector::GetMeta(int idx) { + assert(static_cast(idx) < curr_tensors_size_); + return tensors_[idx]->GetMeta(); +} template const DALIMeta &TensorVector::GetMeta(int idx) const { @@ -264,6 +399,17 @@ bool TensorVector::is_pinned() const { } +template +int TensorVector::device_id() const { + if (IsContiguous()) { + return tl_->device_id(); + } else if (!tensors_.empty()) { + return tensors_[0]->device_id(); + } + return CPU_ONLY_DEVICE_ID; +} + + template void TensorVector::reserve(size_t total_bytes) { if (state_ == State::noncontiguous) { @@ -432,6 +578,7 @@ void TensorVector::resize_tensors(int new_size) { if (!tensors_[i]) { tensors_[i] = std::make_shared>(); tensors_[i]->set_pinned(is_pinned()); + tensors_[i]->set_order(order()); } } } else if (static_cast(new_size) < curr_tensors_size_) { @@ -440,10 +587,41 @@ void TensorVector::resize_tensors(int new_size) { tensors_[i]->Reset(); } } + // TODO(klecki): Do not keep the invalidated tensors - this prevents memory hogging but + // also gets rid of reserved memory. + // tensors_.resize(new_size); } curr_tensors_size_ = new_size; } +template +void TensorVector::PropagateUp(bool contiguous) { + // TODO(klecki): This is mostly simple consistency check, but most of the metadata will be moved + // to the batch object for consitency and easier use in checks. It should allow for shape() + // to be ready to use as well as easy verification for SetSample/CopySample. + SetContiguous(contiguous); + // assume that the curr_tensors_size_ is valid + DALI_ENFORCE(curr_tensors_size_ > 0, "Unexpected empty output of operator. Internal DALI error."); + type_ = tensors_[0]->type_info(); + pinned_ = tensors_[0]->is_pinned(); + order_ = tensors_[0]->order(); + tl_->set_order(order_); + for (size_t i = 0; i < curr_tensors_size_; i++) { + DALI_ENFORCE(type() == tensors_[i]->type(), + make_string("Samples must have the same type, expected: ", type(), + " got: ", tensors_[i]->type(), " at ", i, ".")); + DALI_ENFORCE(sample_dim() == tensors_[i]->shape().sample_dim(), + make_string("Samples must have the same dimensionality, expected: ", sample_dim(), + " got: ", tensors_[i]->shape().sample_dim(), " at ", i, ".")); + DALI_ENFORCE(order() == tensors_[i]->order(), + make_string("Samples must have the same order, expected: ", order().get(), " ", + order().device_id(), " got: ", tensors_[i]->order().get(), " ", + tensors_[i]->order().device_id(), " at ", i, ".")); + DALI_ENFORCE(GetLayout() == tensors_[i]->GetLayout(), + make_string("Samples must have the same layout, expected: ", GetLayout(), + " got: ", tensors_[i]->GetLayout(), " at ", i, ".")); + } +} template void TensorVector::update_view(int idx) { diff --git a/dali/pipeline/data/tensor_vector.h b/dali/pipeline/data/tensor_vector.h index 4adc94de9c5..98bd7230eca 100644 --- a/dali/pipeline/data/tensor_vector.h +++ b/dali/pipeline/data/tensor_vector.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,11 +21,13 @@ #include #include +#include "dali/core/access_order.h" +#include "dali/core/tensor_shape.h" #include "dali/pipeline/data/backend.h" +#include "dali/pipeline/data/sample_view.h" #include "dali/pipeline/data/tensor.h" #include "dali/pipeline/data/tensor_list.h" -#include "dali/core/tensor_shape.h" namespace dali { @@ -35,6 +37,13 @@ namespace dali { * * Propagates Buffer calls to every tensor uniformly * + * TODO(klecki): Expected improvements to TensorVector + * 1. Remove superfluous indirection via shared_ptr to samples. + * 2. Keep metadata (shape, sample_dim, layout, order) at batch level like we already do with type + * 3. Detect and convert between contiguous and non-contiguous when possible: + * a. CopySample of bigger size + * b. Resize with coalesce option + * 4. Contiguity check * @tparam Backend */ template @@ -60,17 +69,17 @@ class DLL_PUBLIC TensorVector { DLL_PUBLIC TensorVector(TensorVector &&other) noexcept; AccessOrder order() const { - return tl_->order(); + return order_; } void set_order(AccessOrder order, bool synchronize = true); - Tensor &operator[](size_t pos) { - return *(tensors_[pos]); + SampleView operator[](size_t pos) { + return {tensors_[pos]->raw_mutable_data(), tensors_[pos]->shape(), tensors_[pos]->type()}; } - const Tensor &operator[](size_t pos) const { - return *(tensors_[pos]); + ConstSampleView operator[](size_t pos) const { + return {tensors_[pos]->raw_data(), tensors_[pos]->shape(), tensors_[pos]->type()}; } auto tensor_handle(size_t pos) { @@ -113,9 +122,13 @@ class DLL_PUBLIC TensorVector { return IsContiguous() ? tl_->sample_dim() : num_samples() ? tensors_[0]->shape().size() : 0; } - size_t nbytes() const noexcept; + size_t total_nbytes() const noexcept; + + size_t total_capacity() const noexcept; - size_t capacity() const noexcept; + std::vector nbytes() const noexcept; + + std::vector capacity() const noexcept; TensorListShape<> shape() const; @@ -123,14 +136,83 @@ class DLL_PUBLIC TensorVector { return tensors_[idx]->shape(); } - const void *raw_tensor(int idx) const { - return tensors_[idx]->raw_data(); + /** + * @brief Returns a typed pointer to the tensor with the given index. + */ + template + DLL_PUBLIC inline T* mutable_tensor(int idx) { + return tensors_[idx]->template mutable_data(); } - void* raw_mutable_tensor(int idx) { + /** + * @brief Returns a const typed pointer to the tensor with the given index. + */ + template + DLL_PUBLIC inline const T* tensor(int idx) const { + return tensors_[idx]->template data(); + } + + /** + * @brief Returns a raw pointer to the tensor with the given index. + */ + DLL_PUBLIC inline void* raw_mutable_tensor(int idx) { return tensors_[idx]->raw_mutable_data(); } + /** + * @brief Returns a const raw pointer to the tensor with the given index. + */ + DLL_PUBLIC inline const void* raw_tensor(int idx) const { + return tensors_[idx]->raw_data(); + } + + /** + * @brief Analogue of TensorVector[dst].ShareData(owner[src]); + * + * The target TensorVector (this) must have enough samples for this to work (see SetSize()). + * After this operation the TensorVector is converted into non-contiguous. + * + * Warning: If the TensorVector was contiguous, the samples that weren't overwritten by this + * function would still report that they are sharing data. It is assumed that all samples are + * replaced this way - TODO(klecki): this might be adjusted in follow-up. + * + * @param dst index of sample to be set + * @param owner owner of source sample + * @param src index of source sample in owner. + */ + DLL_PUBLIC void UnsafeSetSample(int dst, const TensorVector &owner, int src); + + /** + * @brief Analogue of TensorVector[dst].ShareData(owner); + * + * The target TensorVector (this) must have enough samples for this to work (see SetSize()). + * After this operation the TensorVector is converted into non-contiguous. + * + * Warning: If the TensorVector was contiguous, the samples that weren't overwritten by this + * function would still report that they are sharing data. It is assumed that all samples are + * replaced this way - TODO(klecki): this might be adjusted in follow-up. + * + * @param dst index of sample to be set + * @param owner sample owner + */ + DLL_PUBLIC void UnsafeSetSample(int dst, const Tensor &owner); + + /** + * @brief Analogue of TensorVector[dst].Copy(data[src]); + * + * The target TensorVector (this) must have enough samples for this to work (see SetSize()). + * It must either be already non-contiguous or the shapes of copied samples must match exactly. + * + * Warning: It is assumed that the TensorVector is either first resized to desired shape, + * or all samples are copied over. Automatically converting to non-contiguous container from + * contiguous one by invoking copy of non-matching size is not supported yet. + * + * @param dst index of sample to be set + * @param owner sample owner + */ + DLL_PUBLIC void UnsafeCopySample(int dst, const TensorVector &data, int src, + AccessOrder order = {}); + DLL_PUBLIC void Resize(const TensorListShape<> &new_shape) { DALI_ENFORCE(IsValidType(type()), "TensorVector has no type, 'set_type()' or Resize(shape, type) must be called " @@ -163,6 +245,7 @@ class DLL_PUBLIC TensorVector { TensorLayout GetLayout() const; + DALIMeta &GetMeta(int idx); const DALIMeta &GetMeta(int idx) const; void SetMeta(int idx, const DALIMeta &meta); @@ -171,6 +254,8 @@ class DLL_PUBLIC TensorVector { bool is_pinned() const; + int device_id() const; + /** * @brief Reserve as contiguous tensor list internally */ @@ -213,6 +298,21 @@ class DLL_PUBLIC TensorVector { private: enum class State { contiguous, noncontiguous }; + // Forward declarations in signature, beware + friend void MakeSampleView(class SampleWorkspace &sample, class HostWorkspace &batch, + int data_idx, int thread_idx); + friend void EnforceCorrectness(class HostWorkspace &ws, bool contiguous); + + /** + * @brief After RunImpl(SampleWorkspace&) operated on individual samples without propagating + * the allocation metadata back to the the batch structure, take that metadata from the samples + * and update it in TensorVector. + * + * @param contiguous if the Tensor was previously preallocated and should remain contiguous + * or be treated as non-contiguous set of individual samples. + */ + void PropagateUp(bool contiguous); + struct ViewRefDeleter { void operator()(void*) { --*ref; } std::atomic *ref; @@ -230,6 +330,7 @@ class DLL_PUBLIC TensorVector { // pinned status and type info should be uniform bool pinned_ = true; TypeInfo type_{}; + AccessOrder order_; // So we can access the members of other TensorVectors // with different template types diff --git a/dali/pipeline/data/tensor_vector_test.cc b/dali/pipeline/data/tensor_vector_test.cc index 0fd3a88078b..c1950579283 100644 --- a/dali/pipeline/data/tensor_vector_test.cc +++ b/dali/pipeline/data/tensor_vector_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -52,6 +52,8 @@ TYPED_TEST_SUITE(TensorVectorSuite, Backends); // behaves as it is supposed to - that is set_type always first, set_type before Reshape, // reserve can be without type. +// TODO(klecki): reverse pinned and capacity tests + TYPED_TEST(TensorVectorSuite, PinnedAfterReserveThrows) { TensorVector tv_0, tv_1; tv_0.reserve(100); @@ -74,10 +76,8 @@ TYPED_TEST(TensorVectorSuite, PinnedAfterResizeThrows) { EXPECT_EQ(tv.shape(), TensorListShape<>({{2, 4}, {4, 2}})); EXPECT_EQ(tv[0].shape(), TensorShape<>(2, 4)); EXPECT_EQ(tv[1].shape(), TensorShape<>(4, 2)); - EXPECT_EQ(tv[0].nbytes(), 4 * 2 * sizeof(int32_t)); - EXPECT_EQ(tv[1].nbytes(), 4 * 2 * sizeof(int32_t)); - EXPECT_EQ(tv[0].capacity(), 4 * 2 * sizeof(int32_t)); - EXPECT_EQ(tv[1].capacity(), 4 * 2 * sizeof(int32_t)); + EXPECT_EQ(tv[0].type(), DALI_INT32); + EXPECT_EQ(tv[1].type(), DALI_INT32); ASSERT_THROW(tv.set_pinned(false), std::runtime_error); } @@ -92,10 +92,9 @@ TYPED_TEST(TensorVectorSuite, PinnedBeforeResizeContiguous) { EXPECT_EQ(tv.shape(), TensorListShape<>({{2, 4}, {4, 2}})); EXPECT_EQ(tv[0].shape(), TensorShape<>(2, 4)); EXPECT_EQ(tv[1].shape(), TensorShape<>(4, 2)); + EXPECT_EQ(tv.is_pinned(), false); for (auto &t : tv) { - EXPECT_EQ(t->nbytes(), 4 * 2 * sizeof(int32_t)); - EXPECT_EQ(t->capacity(), 4 * 2 * sizeof(int32_t)); - EXPECT_EQ(t->is_pinned(), false); + EXPECT_EQ(t->type(), DALI_INT32); } } @@ -109,10 +108,9 @@ TYPED_TEST(TensorVectorSuite, PinnedBeforeResizeNoncontiguous) { EXPECT_EQ(tv.shape(), TensorListShape<>({{2, 4}, {4, 2}})); EXPECT_EQ(tv[0].shape(), TensorShape<>(2, 4)); EXPECT_EQ(tv[1].shape(), TensorShape<>(4, 2)); + EXPECT_EQ(tv.is_pinned(), false); for (auto &t : tv) { - EXPECT_EQ(t->nbytes(), 4 * 2 * sizeof(int32_t)); - EXPECT_EQ(t->capacity(), 50); - EXPECT_EQ(t->is_pinned(), false); + EXPECT_EQ(t->type(), DALI_INT32); } } @@ -123,9 +121,9 @@ TYPED_TEST(TensorVectorSuite, BatchResize) { tv.reserve(200); tv.template set_type(); tv.Resize(uniform_list_shape(5, {10, 20})); - for (auto &t : tv) { - EXPECT_TRUE(t->shares_data()); - } + // for (auto &t : tv) { + // EXPECT_TRUE(t->shares_data()); + // } } TYPED_TEST(TensorVectorSuite, VariableBatchResizeDown) { @@ -150,7 +148,7 @@ TYPED_TEST(TensorVectorSuite, EmptyShareContiguous) { TensorListShape<> shape = {{100, 0, 0}, {42, 0, 0}}; tv.Resize(shape, DALI_UINT8); for (int i = 0; i < shape.num_samples(); i++) { - ASSERT_EQ(tv[i].raw_data(), nullptr); + ASSERT_EQ(tv.raw_tensor(i), nullptr); } TensorVector target; @@ -160,8 +158,8 @@ TYPED_TEST(TensorVectorSuite, EmptyShareContiguous) { ASSERT_EQ(target.shape(), shape); ASSERT_TRUE(target.IsContiguous()); for (int i = 0; i < shape.num_samples(); i++) { - ASSERT_EQ(target[i].raw_data(), nullptr); - ASSERT_EQ(target[i].raw_data(), tv[i].raw_data()); + ASSERT_EQ(target.raw_tensor(i), nullptr); + ASSERT_EQ(target.raw_tensor(i), tv.raw_tensor(i)); } } @@ -171,7 +169,7 @@ TYPED_TEST(TensorVectorSuite, EmptyShareNonContiguous) { TensorListShape<> shape = {{100, 0, 0}, {42, 0, 0}}; tv.Resize(shape, DALI_UINT8); for (int i = 0; i < shape.num_samples(); i++) { - ASSERT_EQ(tv[i].raw_data(), nullptr); + ASSERT_EQ(tv.raw_tensor(i), nullptr); } TensorVector target; @@ -181,8 +179,8 @@ TYPED_TEST(TensorVectorSuite, EmptyShareNonContiguous) { ASSERT_EQ(target.shape(), shape); ASSERT_FALSE(target.IsContiguous()); for (int i = 0; i < shape.num_samples(); i++) { - ASSERT_EQ(target[i].raw_data(), nullptr); - ASSERT_EQ(target[i].raw_data(), tv[i].raw_data()); + ASSERT_EQ(target.raw_tensor(i), nullptr); + ASSERT_EQ(target.raw_tensor(i), tv.raw_tensor(i)); } } diff --git a/dali/pipeline/data/view_test.cc b/dali/pipeline/data/view_test.cc index 1fbebc99290..d863d1a1a32 100644 --- a/dali/pipeline/data/view_test.cc +++ b/dali/pipeline/data/view_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -66,8 +66,12 @@ TEST(TensorVector, View) { TensorVector tvec(10); tvec.set_type(); std::mt19937_64 rng; + TensorListShape<3> shape(10); + for (int i = 0; i < 10; i++) { + shape.set_tensor_shape(i, TensorShape<3>(100+i, 40+i, 3+i)); + } + tvec.Resize(shape); for (int i = 0; i < 10; i++) { - tvec[i].Resize(TensorShape<3>(100+i, 40+i, 3+i)); UniformRandomFill(view(tvec[i]), rng, 0, 10000); } @@ -79,8 +83,8 @@ TEST(TensorVector, View) { EXPECT_EQ(tv_shape, tlv.shape); EXPECT_EQ(tlv2.shape, tlv.shape); for (int i = 0; i < 10; i++) { - EXPECT_EQ(tlv[i].data, tvec[i].data()); - EXPECT_EQ(tlv2[i].data, tvec[i].data()); + EXPECT_EQ(tlv[i].data, tvec.tensor(i)); + EXPECT_EQ(tlv2[i].data, tvec.tensor(i)); Check(tlv[i], view(tvec[i])); } } @@ -89,11 +93,14 @@ TEST(TensorVector, ReinterpretView) { TensorVector tvec(10); tvec.set_type(); std::mt19937_64 rng; + TensorListShape<3> shape(10); + for (int i = 0; i < 10; i++) { + shape.set_tensor_shape(i, TensorShape<3>(100+i, 40+i, 3+i)); + } + tvec.Resize(shape); for (int i = 0; i < 10; i++) { - tvec[i].Resize(TensorShape<3>(100+i, 40+i, 3+i)); UniformRandomFill(view(tvec[i]), rng, 0, 10000); } - auto tlv = view(tvec); auto tlv_i16 = reinterpret_view(tvec); const auto& ctvec = tvec; diff --git a/dali/pipeline/data/views.h b/dali/pipeline/data/views.h index 390b4bd509c..1da0e747552 100644 --- a/dali/pipeline/data/views.h +++ b/dali/pipeline/data/views.h @@ -82,8 +82,6 @@ TensorShape get_tensor_shape(const TensorList &tl) { template TensorView, T, ndim> view(Tensor &data) { - if (data.shape().empty()) - return {}; using U = std::remove_const_t; detail::enforce_dim_in_view(data.shape()); return { data.template mutable_data(), convert_dim(data.shape()) }; @@ -96,8 +94,6 @@ view(const Tensor &data) { static_assert(std::is_const::value, "Cannot create a non-const view of a `const Tensor<>`. " "Missing `const` in T?"); - if (data.shape().empty()) - return {}; using U = std::remove_const_t; detail::enforce_dim_in_view(data.shape()); return { data.template data(), convert_dim(data.shape()) }; @@ -107,23 +103,21 @@ view(const Tensor &data) { /** * @name Convert from SampleView carrying runtime type information to statically typed TensorView. */ -// @{ template -TensorView, T, ndim> view(SampleView &data) { +TensorView, T, ndim> view(SampleView data) { using U = std::remove_const_t; detail::enforce_dim_in_view(data.shape()); - return {data.template mutable_data(), data.shape()}; + return {data.template _mutable_data(), data.shape()}; } - template -TensorView, T, ndim> view(const SampleView &data) { +TensorView, T, ndim> view(ConstSampleView data) { static_assert(std::is_const::value, "Cannot create a non-const view of a `const Tensor<>`. " "Missing `const` in T?"); using U = std::remove_const_t; detail::enforce_dim_in_view(data.shape()); - return {data.template data(), data.shape()}; + return {data.template _data(), data.shape()}; } // @} @@ -176,7 +170,7 @@ view(TensorVector &data) { std::vector ptrs(shape.num_samples()); for (int i = 0; i < shape.num_samples(); i++) { - ptrs[i] = data[i].template mutable_data(); + ptrs[i] = data.template mutable_tensor(i); } return { std::move(ptrs), convert_dim(shape) }; } @@ -196,7 +190,7 @@ view(const TensorVector &data) { std::vector ptrs(shape.num_samples()); for (int i = 0; i < shape.num_samples(); i++) { - ptrs[i] = data[i].template data(); + ptrs[i] = data.template tensor(i); } return { std::move(ptrs), convert_dim(shape) }; } @@ -214,7 +208,7 @@ reinterpret_view(TensorVector &data) { assert(data.type_info().size() >= sizeof(T)); assert(data.type_info().size() % sizeof(T) == 0); for (int i = 0; i < ret.shape.num_samples(); i++) { - ret.data[i] = static_cast(data[i].raw_mutable_data()); + ret.data[i] = static_cast(data.raw_mutable_tensor(i)); } // If reinterpreting to a smaller type, adjust the inner extent if (data.type_info().size() > sizeof(T)) { @@ -243,7 +237,7 @@ reinterpret_view(const TensorVector &data) { assert(data.type_info().size() >= sizeof(T)); assert(data.type_info().size() % sizeof(T) == 0); for (int i = 0; i < ret.shape.num_samples(); i++) { - ret.data[i] = static_cast(data[i].raw_data()); + ret.data[i] = static_cast(data.raw_tensor(i)); } // If reinterpreting to a smaller type, adjust the inner extent if (data.type_info().size() > sizeof(T)) { @@ -256,6 +250,20 @@ reinterpret_view(const TensorVector &data) { return ret; } + +template +SampleView +sample_view(Tensor &data) { + return { data.raw_mutable_data(), data.shape(), data.type() }; +} + + +template +ConstSampleView +const_sample_view(const Tensor &data) { + return { data.raw_data(), data.shape(), data.type() }; +} + } // namespace dali #endif // DALI_PIPELINE_DATA_VIEWS_H_ diff --git a/dali/pipeline/executor/executor.h b/dali/pipeline/executor/executor.h index d0eedb81806..3593572c33e 100644 --- a/dali/pipeline/executor/executor.h +++ b/dali/pipeline/executor/executor.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -147,31 +147,37 @@ class DLL_PUBLIC Executor : public ExecutorBase, public QueuePolicy { DLL_PUBLIC void RunGPUImpl(); DLL_PUBLIC void SyncDevice(); - template + template inline void GetMaxSizesCont(T &in, size_t &max_out_size, size_t &max_reserved_size) { - auto out_size = in.nbytes(); - auto reserved_size = in.capacity(); + auto out_size = in.total_nbytes(); + auto reserved_size = in.total_capacity(); max_out_size = std::max(std::ceil((out_size * 1.0) / in.num_samples()), max_out_size); max_reserved_size = std::max(std::ceil((reserved_size * 1.0) / in.num_samples()), max_reserved_size); } - template + template inline void GetMaxSizesNonCont(T &in, size_t &max_out_size, size_t &max_reserved_size) { - for (size_t j = 0; j < in.num_samples(); ++j) { - max_out_size = std::max(in[j].nbytes(), max_out_size); - max_reserved_size = std::max(in[j].capacity(), max_reserved_size); + const auto &nbytes = in.nbytes(); + const auto &capacity = in.capacity(); + max_out_size = 0; + max_reserved_size = 0; + for (auto &elem : nbytes) { + max_out_size = std::max(max_out_size, elem); + } + for (auto &elem : capacity) { + max_reserved_size = std::max(max_reserved_size, elem); } } - template - inline void GetMaxSizes(TensorList &in, size_t &max_out_size, + template + inline void GetMaxSizes(TensorList &in, size_t &max_out_size, size_t &max_reserved_size) { GetMaxSizesCont(in, max_out_size, max_reserved_size); } - template - inline void GetMaxSizes(TensorVector &in, size_t &max_out_size, + template + inline void GetMaxSizes(TensorVector &in, size_t &max_out_size, size_t &max_reserved_size) { if (in.IsContiguous()) { GetMaxSizesCont(in, max_out_size, max_reserved_size); @@ -199,13 +205,13 @@ class DLL_PUBLIC Executor : public ExecutorBase, public QueuePolicy { max_reserved_size = 0; if (ws.template OutputIsType(i)) { auto &out = ws.template Output(i); - out_size = out.nbytes(); - reserved_size = out.capacity(); + out_size = out.total_nbytes(); + reserved_size = out.total_capacity(); GetMaxSizes(out, max_out_size, max_reserved_size); } else { auto &out = ws.template Output(i); - out_size = out.nbytes(); - reserved_size = out.capacity(); + out_size = out.total_nbytes(); + reserved_size = out.total_capacity(); GetMaxSizes(out, max_out_size, max_reserved_size); } stats[i].real_size = std::max(out_size, stats[i].real_size); diff --git a/dali/pipeline/operator/arg_helper_test.cc b/dali/pipeline/operator/arg_helper_test.cc index c4df97deaa1..5ca69cacead 100644 --- a/dali/pipeline/operator/arg_helper_test.cc +++ b/dali/pipeline/operator/arg_helper_test.cc @@ -38,7 +38,7 @@ void SetupData(TensorVector &tv, tv.set_pinned(false); tv.Resize(sh, DALI_FLOAT); for (size_t i = 0; i < tv.num_samples(); i++) { - float *data = tv[i].mutable_data(); + float *data = tv.mutable_tensor(i); for (int j = 0; j < volume(sh[i]); j++) { data[j] = 100 * i + j; } @@ -63,7 +63,7 @@ void ArgValueTestTensorInput(TensorListShape ts, AcquireArgs... args) { auto sh = ts[i]; ASSERT_EQ(sh, arg[i].shape); for (int j = 0; j < volume(sh); j++) { - float *ptr = (*arg_data)[i].mutable_data(); + float *ptr = arg_data->mutable_tensor(i); ASSERT_EQ(ptr[j], arg[i].data[j]); } } diff --git a/dali/pipeline/operator/builtin/external_source.cc b/dali/pipeline/operator/builtin/external_source.cc index a35265729cd..4f47ae6dd88 100644 --- a/dali/pipeline/operator/builtin/external_source.cc +++ b/dali/pipeline/operator/builtin/external_source.cc @@ -33,11 +33,12 @@ void ExternalSource::RunImpl(HostWorkspace &ws) { auto curr_batch_size = shapes.num_samples(); output.Resize(shapes, tensor_vector_elm.front()->type()); + for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) { thread_pool.AddWork( - [&ws, sample_id, &tensor_vector_elm](int tid) { - Tensor &output_tensor = ws.Output(0)[sample_id]; - output_tensor.Copy((*tensor_vector_elm.front())[sample_id], AccessOrder::host()); + [&output, sample_id, &tensor_vector_elm](int tid) { + output.UnsafeCopySample(sample_id, *tensor_vector_elm.front(), sample_id, + AccessOrder::host()); }, shapes.tensor_size(sample_id)); } diff --git a/dali/pipeline/operator/builtin/external_source.h b/dali/pipeline/operator/builtin/external_source.h index c0cff6a2400..59db76d61df 100644 --- a/dali/pipeline/operator/builtin/external_source.h +++ b/dali/pipeline/operator/builtin/external_source.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -255,7 +255,7 @@ class ExternalSource : public Operator, virtual public BatchSizeProvide DomainTimeRange tr("[DALI][ExternalSource] SetDataSource", DomainTimeRange::kViolet); TensorVector tv(vect_of_tensors.size()); for (size_t i = 0; i < tv.num_samples(); ++i) { - tv[i].ShareData(const_cast &>(vect_of_tensors[i])); + tv.UnsafeSetSample(i, const_cast &>(vect_of_tensors[i])); } SetDataSourceHelper(tv, order, ext_src_setting_mode); } diff --git a/dali/pipeline/operator/builtin/external_source_test.cc b/dali/pipeline/operator/builtin/external_source_test.cc index f8d32a444df..c3ab4d9d6ff 100644 --- a/dali/pipeline/operator/builtin/external_source_test.cc +++ b/dali/pipeline/operator/builtin/external_source_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -202,7 +202,7 @@ class ExternalSourceTest : public::testing::WithParamInterface, CUDA_CALL(cudaStreamSynchronize(ws.has_stream() ? ws.stream() : 0)); for (int j = 0; j < this->batch_size_; ++j) { - auto data = tensor_cpu_list.template mutable_tensor(j); + const auto *data = tensor_cpu_list.template tensor(j); for (int i = 0; i < volume(tensor_cpu_list.tensor_shape(j)); ++i) { if (data[i] != check_counter_) { return false; diff --git a/dali/pipeline/operator/builtin/make_contiguous.cc b/dali/pipeline/operator/builtin/make_contiguous.cc index 2e05a687ec3..ac9d802931f 100644 --- a/dali/pipeline/operator/builtin/make_contiguous.cc +++ b/dali/pipeline/operator/builtin/make_contiguous.cc @@ -26,7 +26,7 @@ void MakeContiguousCPU::RunImpl(HostWorkspace &ws) { auto &thread_pool = ws.GetThreadPool(); for (int sample_id = 0; sample_id < batch_size; ++sample_id) { thread_pool.AddWork([sample_id, &input, &output] (int tid) { - output[sample_id].Copy(input[sample_id], AccessOrder::host()); + output.UnsafeCopySample(sample_id, input, sample_id, AccessOrder::host()); }, shapes.tensor_size(sample_id)); } thread_pool.RunAll(); diff --git a/dali/pipeline/operator/builtin/make_contiguous.cu b/dali/pipeline/operator/builtin/make_contiguous.cu index f2767eddc02..01735a5b62e 100644 --- a/dali/pipeline/operator/builtin/make_contiguous.cu +++ b/dali/pipeline/operator/builtin/make_contiguous.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,10 +22,11 @@ void MakeContiguousMixed::Run(MixedWorkspace &ws) { int sample_dim = input[0].shape().sample_dim(); size_t batch_size = input.num_samples(); DALIDataType type = input.type(); + size_t type_size = input.type_info().size(); for (size_t i = 0; i < input.num_samples(); ++i) { - auto &sample = ws.Input(0)[i]; - size_t sample_bytes = sample.nbytes(); + auto sample = ws.Input(0)[i]; + size_t sample_bytes = sample.shape().num_elements() * type_size; if (coalesced && sample_bytes > COALESCE_THRESHOLD) coalesced = false; DALI_ENFORCE(type == sample.type(), "Inconsistent types in " diff --git a/dali/pipeline/operator/common.h b/dali/pipeline/operator/common.h index 6aacdd7f87d..de7fb63ce50 100644 --- a/dali/pipeline/operator/common.h +++ b/dali/pipeline/operator/common.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -60,7 +60,7 @@ void GetPerSampleArgument(std::vector &output, const std::string &argument_na batch_size, ") tensor list. Got: ", shape)); output.resize(batch_size); - auto *data = arg[0].template data(); + auto *data = arg.template tensor(0); for (int i = 0; i < batch_size; i++) { output[i] = data[i]; @@ -75,7 +75,7 @@ void GetPerSampleArgument(std::vector &output, const std::string &argument_na output.resize(batch_size); for (int i = 0; i < batch_size; i++) { - output[i] = arg[i].template data()[0]; + output[i] = arg.template tensor(i)[0]; } } } else { @@ -103,8 +103,7 @@ void GetGeneralizedArg(span result, const std::string &name, int sample_idx, int argument_length = result.size(); if (spec.HasTensorArgument(name)) { const auto& tv = ws.ArgumentInput(name); - const auto& tensor = tv[sample_idx]; - const auto& shape = tensor.shape(); + const auto& shape = tv.tensor_shape(sample_idx); auto vol = volume(shape); if (shape.size() != 0) { DALI_ENFORCE(shape.size() == 1, @@ -118,10 +117,10 @@ void GetGeneralizedArg(span result, const std::string &name, int sample_idx, } if (vol == 1) { for (int i = 0; i < argument_length; i++) { - result[i] = tensor.data()[0]; + result[i] = tv.tensor(sample_idx)[0]; } } else { - memcpy(result.data(), tensor.data(), sizeof(T) * argument_length); + memcpy(result.data(), tv.tensor(sample_idx), sizeof(T) * argument_length); } return; } diff --git a/dali/pipeline/operator/op_spec.h b/dali/pipeline/operator/op_spec.h index a2ac5185787..64fa2ec68ff 100644 --- a/dali/pipeline/operator/op_spec.h +++ b/dali/pipeline/operator/op_spec.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -494,7 +494,7 @@ inline T OpSpec::GetArgumentImpl( DALI_ENFORCE(IsType(value.type()), make_string( "Unexpected type of argument \"", name, "\". Expected ", TypeTable::GetTypeName(), " and got ", value.type())); - return static_cast(value[idx].data()[0]); + return static_cast(value.tensor(idx)[0]); } // Search for the argument locally auto arg_it = arguments_.find(name); @@ -524,7 +524,7 @@ inline bool OpSpec::TryGetArgumentImpl( } if (!IsType(value.type())) return false; - result = value[idx].data()[0]; + result = value.tensor(idx)[0]; return true; } // Search for the argument locally diff --git a/dali/pipeline/operator/op_spec_test.cc b/dali/pipeline/operator/op_spec_test.cc index 6fd0c9a410d..3cfb67f7473 100644 --- a/dali/pipeline/operator/op_spec_test.cc +++ b/dali/pipeline/operator/op_spec_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -74,7 +74,7 @@ TEST(OpSpecTest, GetArgumentTensorSet) { auto tv = std::make_shared>(2); tv->Resize(TensorListShape<0>(2), DALI_INT32); for (int i = 0; i < 2; i++) { - tv->tensor_handle(i)->mutable_data()[0] = 42 + i; + view((*tv)[i]).data[0] = 42 + i; } ws0.AddArgumentInput(arg_name, tv); auto spec0 = OpSpec("DummyOpForSpecTest") @@ -325,18 +325,18 @@ class TestArgumentInput_Producer : public Operator { // Initialize all the data with a 0, 1, 2 .... sequence auto &out0 = ws.Output(0); for (int i = 0; i < out0.shape().num_samples(); i++) { - *out0[i].mutable_data() = i; + *out0.mutable_tensor(i) = i; } auto &out1 = ws.Output(1); for (int i = 0; i < out1.shape().num_samples(); i++) { - *out1[i].mutable_data() = i; + *out1.mutable_tensor(i) = i; } auto &out2 = ws.Output(2); for (int i = 0; i < out2.shape().num_samples(); i++) { for (int j = 0; j < 2; j++) { - out2[i].mutable_data()[j] = i; + out2.mutable_tensor(i)[j] = i; } } } @@ -379,7 +379,7 @@ class TestArgumentInput_Consumer : public Operator { ASSERT_TRUE(is_uniform(ref_1.shape())); ASSERT_EQ(ref_1.shape()[0], TensorShape<>(1)); for (int i = 0; i < ref_1.shape().num_samples(); i++) { - EXPECT_EQ(ref_1[i].data()[0], i); + EXPECT_EQ(ref_1.tensor(i)[0], i); } auto &ref_2 = ws.ArgumentInput("arg2"); @@ -388,7 +388,7 @@ class TestArgumentInput_Consumer : public Operator { ASSERT_EQ(ref_2.shape()[0], TensorShape<>(1, 2)); for (int i = 0; i < ref_2.shape().num_samples(); i++) { for (int j = 0; j < 2; j++) { - EXPECT_EQ(ref_2[i].data()[j], i); + EXPECT_EQ(ref_2.tensor(i)[j], i); } } } diff --git a/dali/pipeline/operator/operator.h b/dali/pipeline/operator/operator.h index 2e82760b4d5..01908726145 100644 --- a/dali/pipeline/operator/operator.h +++ b/dali/pipeline/operator/operator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -337,7 +337,11 @@ class Operator : public OperatorBase { this->RunImpl(sample); }, -data_idx); // -data_idx for FIFO order } + // Run all tasks and wait for them to finish thread_pool.RunAll(); + // Propagate metadata from individual samples to the whole batch as working with SampleWorkspace + // breaks metadata consistency - it sets it only to samples + EnforceCorrectness(ws, CanInferOutputs()); } /** diff --git a/dali/pipeline/pipeline_test.cc b/dali/pipeline/pipeline_test.cc index d941c25be8a..bcfc57d88a7 100644 --- a/dali/pipeline/pipeline_test.cc +++ b/dali/pipeline/pipeline_test.cc @@ -301,13 +301,13 @@ class DummyPresizeOpCPU : public Operator { const auto &input = ws.Input(0); int num_samples = input.shape().num_samples(); auto &output = ws.Output(0); - auto tmp_size = output.capacity(); + auto tmp_size = output.total_capacity(); output.set_type(); output.Resize(uniform_list_shape(num_samples, std::vector{2})); for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) { - auto *out = output[sample_idx].mutable_data(); + auto *out = output.mutable_tensor(sample_idx); out[0] = tmp_size; - out[1] = input.capacity(); + out[1] = input.total_capacity(); } } }; @@ -327,7 +327,7 @@ class DummyPresizeOpGPU : public Operator { int num_samples = input.shape().num_samples(); auto &output = ws.Output(0); output.set_type(); - size_t tmp_size[2] = {output.capacity(), input.capacity()}; + size_t tmp_size[2] = {output.total_capacity(), input.total_capacity()}; output.Resize(uniform_list_shape(num_samples, std::vector{2})); for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) { auto *out = output.mutable_tensor(sample_idx); @@ -353,7 +353,7 @@ class DummyPresizeOpMixed : public Operator { int num_samples = input.shape().num_samples(); auto &output = ws.Output(0); output.set_type(); - size_t tmp_size[2] = {output.capacity(), input.capacity()}; + size_t tmp_size[2] = {output.total_capacity(), input.total_capacity()}; output.Resize(uniform_list_shape(num_samples, std::vector{2})); for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) { auto *out = output.mutable_tensor(sample_idx); diff --git a/dali/pipeline/workspace/sample_workspace.cc b/dali/pipeline/workspace/sample_workspace.cc index 7e75bc7c725..6d82eb9d6cf 100644 --- a/dali/pipeline/workspace/sample_workspace.cc +++ b/dali/pipeline/workspace/sample_workspace.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,10 +25,10 @@ void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx, for (int i = 0; i < num_inputs; i++) { if (batch.InputIsType(i)) { auto &input_ref = batch.UnsafeMutableInput(i); - sample.AddInput(&input_ref[data_idx]); + sample.AddInput(input_ref.tensor_handle(data_idx).get()); } else { auto &input_ref = batch.UnsafeMutableInput(i); - sample.AddInput(&input_ref[data_idx]); + sample.AddInput(input_ref.tensor_handle(data_idx).get()); } } @@ -36,10 +36,10 @@ void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx, for (int i = 0; i < num_outputs; i++) { if (batch.OutputIsType(i)) { auto &output_ref = batch.Output(i); - sample.AddOutput(&output_ref[data_idx]); + sample.AddOutput(output_ref.tensor_handle(data_idx).get()); } else { auto &output_ref = batch.Output(i); - sample.AddOutput(&output_ref[data_idx]); + sample.AddOutput(output_ref.tensor_handle(data_idx).get()); } } for (auto& arg_pair : batch) { @@ -48,4 +48,10 @@ void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx, } } +void EnforceCorrectness(HostWorkspace& ws, bool contiguous) { + for (int i = 0; i < ws.NumOutput(); i++) { + ws.Output(i).PropagateUp(contiguous); + } +} + } // namespace dali diff --git a/dali/pipeline/workspace/sample_workspace.h b/dali/pipeline/workspace/sample_workspace.h index 9e281db5437..102a9d269b4 100644 --- a/dali/pipeline/workspace/sample_workspace.h +++ b/dali/pipeline/workspace/sample_workspace.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -134,6 +134,17 @@ class DLL_PUBLIC SampleWorkspace : public WorkspaceBase > TensorListGetItemImpl(TensorList &t, auto ptr = std::make_unique>(); // TODO(klecki): Rework this with proper sample-based tensor batch data structure auto sample_shared_ptr = unsafe_sample_owner(t, id); - ptr->ShareData(sample_shared_ptr, t.capacity(), t.is_pinned(), t.shape()[id], t.type()); + ptr->ShareData(sample_shared_ptr, t.total_capacity(), t.is_pinned(), t.shape()[id], t.type()); ptr->set_device_id(t.device_id()); ptr->SetMeta(t.GetMeta(id)); return ptr; @@ -623,7 +623,7 @@ std::shared_ptr> TensorListFromListOfTensors(py::list &list_ cur_type, "' expected to have type '", DALIDataType(expected_type), "'.")); } - tv[i].ShareData(t); + tv.UnsafeSetSample(i, t); } catch (const py::type_error &) { throw; } catch (const std::runtime_error &) { @@ -1269,7 +1269,8 @@ void FeedPipeline(Pipeline *p, const string &name, py::list list, AccessOrder or TensorVector tv(list.size()); for (size_t i = 0; i < list.size(); ++i) { auto &t = list[i].cast&>(); - tv[i] = std::move(t); + tv.UnsafeSetSample(i, t); + // TODO(klecki): tv[i] = std::move(t); } p->SetExternalInput(name, tv, order, sync, use_copy_kernel); } diff --git a/dali/test/dali_test.h b/dali/test/dali_test.h index 917f3c8dbc0..dbddb54c443 100644 --- a/dali/test/dali_test.h +++ b/dali/test/dali_test.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -186,7 +186,7 @@ class DALITest : public ::testing::Test { for (int i = 0; i < n; ++i) { std::memcpy(tl->template mutable_tensor(i), data[i % nImgs], data_sizes[i % nImgs]); - tl->SetSourceInfo(i, imgs.filenames_[i % nImgs]); + tl->GetMeta(i).SetSourceInfo(imgs.filenames_[i % nImgs]); } } diff --git a/dali/test/dali_test_decoder.h b/dali/test/dali_test_decoder.h index 8f3cdd75509..977c56fb7f5 100644 --- a/dali/test/dali_test_decoder.h +++ b/dali/test/dali_test_decoder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,7 +32,8 @@ class GenericDecoderTest : public DALISingleOpTest { // single input - encoded images // single output - decoded images - TensorVector out(inputs[0]->num_samples()); + TensorVector out(inputs[0]->num_samples()); + std::vector> tmp_out(inputs[0]->num_samples()); const TensorList &encoded_data = *inputs[0]; @@ -41,7 +42,16 @@ class GenericDecoderTest : public DALISingleOpTest { auto *data = encoded_data.tensor(i); auto data_size = volume(encoded_data.tensor_shape(i)); - this->DecodeImage(data, data_size, c, this->ImageType(), &out[i]); + this->DecodeImage(data, data_size, c, this->ImageType(), &tmp_out[i]); + } + + TensorListShape<> out_shape(inputs[0]->num_samples(), 3); + for (size_t i = 0; i < encoded_data.num_samples(); ++i) { + out_shape.set_tensor_shape(i, tmp_out[i].shape()); + } + out.Resize(out_shape, DALI_UINT8); + for (size_t i = 0; i < encoded_data.num_samples(); ++i) { + out.UnsafeSetSample(i, tmp_out[i]); } vector>> outputs; diff --git a/dali/test/dali_test_resize.h b/dali/test/dali_test_resize.h index 5e63a07b3b0..5a5abee77f4 100755 --- a/dali/test/dali_test_resize.h +++ b/dali/test/dali_test_resize.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. #ifndef DALI_TEST_DALI_TEST_RESIZE_H_ #define DALI_TEST_DALI_TEST_RESIZE_H_ @@ -28,6 +28,7 @@ class GenericResizeTest : public DALISingleOpTest { // single input - encoded images // single output - decoded images TensorVector out(inputs[0]->num_samples()); + std::vector> tmp_out(inputs[0]->num_samples()); const TensorList& image_data = *inputs[0]; const uint32_t resizeOptions = getResizeOptions(); @@ -175,12 +176,23 @@ class GenericResizeTest : public DALISingleOpTest { finalImg = &mirror_img; } - out[i].Resize({finalImg->rows, finalImg->cols, c}, DALI_UINT8); - auto *out_data = out[i].mutable_data(); + tmp_out[i].Resize({finalImg->rows, finalImg->cols, c}, DALI_UINT8); + auto *out_data = tmp_out[i].mutable_data(); std::memcpy(out_data, finalImg->ptr(), finalImg->rows * finalImg->cols * c); } + TensorListShape<> shape(tmp_out.size(), tmp_out[0].shape().sample_dim()); + for (size_t i = 0; i < image_data.num_samples(); ++i) { + shape.set_tensor_shape(i, tmp_out[i].shape()); + } + // TODO(klecki): If sharing we do not need to resize, we just need to enforce that we have + // enough samples + out.Resize(shape, tmp_out[0].type()); + for (size_t i = 0; i < image_data.num_samples(); ++i) { + out.UnsafeSetSample(i, tmp_out[i]); + } + vector>> outputs; outputs.push_back(std::make_shared>()); outputs[0]->Copy(out); diff --git a/dali/util/pybind.h b/dali/util/pybind.h index b5400808b2f..790000b7dcd 100644 --- a/dali/util/pybind.h +++ b/dali/util/pybind.h @@ -203,8 +203,8 @@ static py::capsule DLTensorToCapsule(DLMTensorPtr dl_tensor) { } template -py::capsule TensorToDLPackView(Tensor &tensor) { - DLMTensorPtr dl_tensor = GetDLTensorView(tensor); +py::capsule TensorToDLPackView(SampleView tensor, int device_id) { + DLMTensorPtr dl_tensor = GetDLTensorView(tensor, device_id); return DLTensorToCapsule(std::move(dl_tensor)); }