diff --git a/dali/benchmark/displacement_cpu_bench.cc b/dali/benchmark/displacement_cpu_bench.cc
index 4be5e3b6393..9185f63a50d 100644
--- a/dali/benchmark/displacement_cpu_bench.cc
+++ b/dali/benchmark/displacement_cpu_bench.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -88,7 +88,7 @@ void DisplacementBench(benchmark::State& st) {//NOLINT
   // tensor out is resized by operator itself in DisplacementFilter::DataDependentSetup()
 
   // TODO(klecki) Accomodate to use different inputs from test data
-  auto *ptr = (*tensor_in)[0].template mutable_data<T>();
+  auto *ptr = (*tensor_in).template mutable_tensor<T>(0);
   for (int i = 0; i < N; i++) {
     ptr[i] = i;
   }
diff --git a/dali/benchmark/operator_bench.h b/dali/benchmark/operator_bench.h
index 47e819ad94c..c69303bc5da 100644
--- a/dali/benchmark/operator_bench.h
+++ b/dali/benchmark/operator_bench.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -54,16 +54,13 @@ class OperatorBench : public DALIBenchmark {
     auto op_ptr = InstantiateOperator(op_spec);
 
     auto data_in = std::make_shared<TensorVector<CPUBackend>>(batch_size);
-    for (auto &in_ptr : *data_in) {
-      in_ptr = std::make_shared<Tensor<CPUBackend>>();
-      in_ptr->set_type<T>();
-      in_ptr->Resize({H, W, C});
-      in_ptr->SetLayout("HWC");
-    }
+    data_in->set_type<T>();
+    data_in->Resize(uniform_list_shape(batch_size, TensorShape<>{H, W, C}));
+    data_in->SetLayout("HWC");
 
     if (fill_in_data) {
-      for (auto &in_ptr : *data_in) {
-        auto *ptr = in_ptr->template mutable_data<T>();
+      for (int sample_id = 0; sample_id < batch_size; sample_id++) {
+        auto *ptr = data_in->template mutable_tensor<T>(sample_id);
         for (int i = 0; i < N; i++) {
           ptr[i] = static_cast<T>(i);
         }
diff --git a/dali/c_api/c_api.cc b/dali/c_api/c_api.cc
index 12acc45626f..6e571e5cdf6 100644
--- a/dali/c_api/c_api.cc
+++ b/dali/c_api/c_api.cc
@@ -143,11 +143,14 @@ void SetExternalInputTensors(daliPipelineHandle *pipe_handle, const char *name,
     // We cast away the const from data_ptr, as there is no other way of passing it to the
     // Tensor as we must also set the shape and type metadata.
     // The vector that we pass to pipeline is const.
-    data[i].set_pinned(flags & DALI_ext_pinned);
-    data[i].set_order(order);
-    data[i].ShareData(const_cast<void *>(data_ptr[i]), tl_shape[i].num_elements() * elem_sizeof);
-    data[i].Resize(tl_shape[i], type_id);
-    data[i].SetLayout(layout);
+    dali::Tensor<Backend> tmp;
+    tmp.set_order(order);
+    std::shared_ptr<void> ptr(const_cast<void *>(data_ptr[i]), [](void *){});  // no deleter
+    tmp.set_backing_allocation(ptr,
+                               tl_shape[i].num_elements() * elem_sizeof, flags & DALI_ext_pinned,
+                               type_id, tl_shape[i].num_elements());
+    tmp.Resize(tl_shape[i], type_id);
+    data.UnsafeSetSample(i, tmp);
   }
   pipeline->SetExternalInput(name, data, order,
                              flags & DALI_ext_force_sync,
@@ -429,7 +432,7 @@ size_t daliNumElements(daliPipelineHandle* pipe_handle, int n) {
 
 template <typename T>
 static size_t daliTensorSizeHelper(dali::DeviceWorkspace* ws, int n) {
-  return ws->Output<T>(n).nbytes();
+  return ws->Output<T>(n).total_nbytes();
 }
 
 size_t daliTensorSize(daliPipelineHandle* pipe_handle, int n) {
diff --git a/dali/operators/audio/nonsilence_op.h b/dali/operators/audio/nonsilence_op.h
index 45b9c689c6d..cfe0a01ac9a 100644
--- a/dali/operators/audio/nonsilence_op.h
+++ b/dali/operators/audio/nonsilence_op.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -228,8 +228,8 @@ class NonsilenceOperatorCpu : public NonsilenceOperator<CPUBackend> {
                   args.reset_interval = reset_interval_;
 
                   auto res = DetectNonsilenceRegion(intermediate_buffers_[thread_id], args);
-                  auto beg_ptr = output_begin[sample_id].mutable_data<int>();
-                  auto len_ptr = output_length[sample_id].mutable_data<int>();
+                  auto beg_ptr = output_begin.mutable_tensor<int>(sample_id);
+                  auto len_ptr = output_length.mutable_tensor<int>(sample_id);
                   *beg_ptr = res.first;
                   *len_ptr = res.second;
               }, in_shape.tensor_size(sample_id));
diff --git a/dali/operators/audio/preemphasis_filter_op.cc b/dali/operators/audio/preemphasis_filter_op.cc
index fc319a4b1bc..106780a4cda 100644
--- a/dali/operators/audio/preemphasis_filter_op.cc
+++ b/dali/operators/audio/preemphasis_filter_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -65,11 +65,11 @@ void PreemphasisFilterCPU::RunImplTyped(workspace_t<CPUBackend> &ws) {
   for (int sample_id = 0; sample_id < nsamples; sample_id++) {
     tp.AddWork(
       [this, &output, &input, sample_id](int thread_id) {
-        const auto in_ptr = input[sample_id].data<InputType>();
-        auto out_ptr = output[sample_id].mutable_data<OutputType>();
-        DALI_ENFORCE(input[sample_id].shape() == output[sample_id].shape(),
+        const auto in_ptr = input.tensor<InputType>(sample_id);
+        auto out_ptr = output.mutable_tensor<OutputType>(sample_id);
+        DALI_ENFORCE(input.tensor_shape(sample_id) == output.tensor_shape(sample_id),
                      "Input and output shapes don't match");
-        auto n = volume(output[sample_id].shape());
+        auto n = volume(output.tensor_shape(sample_id));
         auto coeff = preemph_coeff_[sample_id];
         if (coeff == 0.0f) {
           for (int64_t j = 0; j < n; j++) {
diff --git a/dali/operators/decoder/audio/audio_decoder_op.cc b/dali/operators/decoder/audio/audio_decoder_op.cc
index 1e07d6fedff..6cc1759d9df 100644
--- a/dali/operators/decoder/audio/audio_decoder_op.cc
+++ b/dali/operators/decoder/audio/audio_decoder_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -88,13 +88,13 @@ AudioDecoderCpu::SetupImpl(std::vector<OutputDesc> &output_desc, const workspace
 
   for (int i = 0; i < batch_size; i++) {
     auto &meta = sample_meta_[i] =
-        decoders_[i]->Open({static_cast<const char *>(input[i].raw_data()),
-                            input[i].shape().num_elements()});
+        decoders_[i]->Open({static_cast<const char *>(input.raw_tensor(i)),
+                            input.tensor_shape(i).num_elements()});
     TensorShape<> data_sample_shape = DecodedAudioShape(
         meta, use_resampling_ ? target_sample_rates_[i] : -1.0f, downmix_);
     shape_data.set_tensor_shape(i, data_sample_shape);
     shape_rate.set_tensor_shape(i, {});
-    files_names_[i] = input[i].GetSourceInfo();
+    files_names_[i] = input.GetMeta(i).GetSourceInfo();
   }
 
   output_desc[0] = { shape_data, output_type_ };
diff --git a/dali/operators/decoder/decoder_test.h b/dali/operators/decoder/decoder_test.h
index d9ecd0efad9..18531e8835e 100644
--- a/dali/operators/decoder/decoder_test.h
+++ b/dali/operators/decoder/decoder_test.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <string>
 #include <vector>
 #include <memory>
+#include "dali/pipeline/data/types.h"
 #include "dali/test/dali_test_decoder.h"
 
 namespace dali {
@@ -64,6 +65,7 @@ class DecodeTestBase : public GenericDecoderTest<ImgType> {
     // single input - encoded images
     // single output - decoded images
     TensorVector<CPUBackend> out(inputs[0]->num_samples());
+    std::vector<Tensor<CPUBackend>> tmp_out(inputs[0]->num_samples());
     const TensorList<CPUBackend> &encoded_data = *inputs[0];
     const int c = this->GetNumColorComp();
 
@@ -72,7 +74,16 @@ class DecodeTestBase : public GenericDecoderTest<ImgType> {
       auto data_size = volume(encoded_data.tensor_shape(i));
       this->DecodeImage(
         data, data_size, c, this->ImageType(),
-        &out[i], GetCropWindowGenerator(i));
+        &tmp_out[i], GetCropWindowGenerator(i));
+    }
+
+    TensorListShape<> out_shape(inputs[0]->num_samples(), 3);
+    for (size_t i = 0; i < encoded_data.num_samples(); ++i) {
+      out_shape.set_tensor_shape(i, tmp_out[i].shape());
+    }
+    out.Resize(out_shape, DALI_UINT8);
+    for (size_t i = 0; i < encoded_data.num_samples(); ++i) {
+      out.UnsafeSetSample(i, tmp_out[i]);
     }
 
     vector<std::shared_ptr<TensorList<CPUBackend>>> outputs;
diff --git a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
index 713f2d8f49e..bf50a3d0ba9 100644
--- a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
+++ b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -554,15 +554,16 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
     samples_jpeg2k_.clear();
 #endif  // NVJPEG2K_ENABLED
 
+    const auto &input = ws.Input<CPUBackend>(0);
     for (int i = 0; i < curr_batch_size; i++) {
-      const auto &in = ws.Input<CPUBackend>(0)[i];
-      const auto in_size = in.size();
-      thread_pool_.AddWork([this, i, &in, in_size](int tid) {
-        auto *input_data = in.data<uint8_t>();
+      auto *input_data = input.tensor<uint8_t>(i);
+      const auto in_size = input.tensor_shape(i).num_elements();
+      const auto &source_info = input.GetMeta(i).GetSourceInfo();
+      thread_pool_.AddWork([this, i, input_data, in_size, source_info](int tid) {
         SampleData &data = sample_data_[i];
         data.clear();
         data.sample_idx = i;
-        data.file_name = in.GetSourceInfo();
+        data.file_name = source_info;
         data.encoded_length = in_size;
 
         auto cached_shape = CacheImageShape(data.file_name);
@@ -704,15 +705,17 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
 
   void ProcessImagesCuda(MixedWorkspace &ws) {
     auto& output = ws.Output<GPUBackend>(0);
+    const auto &input = ws.Input<CPUBackend>(0);
     for (auto *sample : samples_single_) {
       assert(sample);
       auto i = sample->sample_idx;
       auto *output_data = output.mutable_tensor<uint8_t>(i);
-      const auto &in = ws.Input<CPUBackend>(0)[i];
+      const auto *in_data = input.tensor<uint8_t>(i);
+      const auto in_size = input.tensor_shape(i).num_elements();
       thread_pool_.AddWork(
-        [this, sample, &in, output_data](int tid) {
-          SampleWorker(sample->sample_idx, sample->file_name, in.size(), tid,
-            in.data<uint8_t>(), output_data, streams_[tid]);
+        [this, sample, in_data, in_size, output_data](int tid) {
+          SampleWorker(sample->sample_idx, sample->file_name, in_size, tid,
+            in_data, output_data, streams_[tid]);
         }, task_priority_seq_--);  // FIFO order, since the samples were already ordered
     }
   }
@@ -808,15 +811,17 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
   }
 
   void ProcessImagesHost(MixedWorkspace &ws) {
+    const auto &input = ws.Input<CPUBackend>(0);
     auto& output = ws.Output<GPUBackend>(0);
     for (auto *sample : samples_host_) {
       auto i = sample->sample_idx;
+      const auto *input_data = input.tensor<uint8_t>(i);
+      auto in_size = input.tensor_shape(i).num_elements();
       auto *output_data = output.mutable_tensor<uint8_t>(i);
-      const auto &in = ws.Input<CPUBackend>(0)[i];
       ImageCache::ImageShape shape = output_shape_[i].to_static<3>();
       thread_pool_.AddWork(
-        [this, sample, &in, output_data, shape](int tid) {
-          HostFallback<StorageGPU>(in.data<uint8_t>(), in.size(), output_image_type_, output_data,
+        [this, sample, input_data, in_size, output_data, shape](int tid) {
+          HostFallback<StorageGPU>(input_data, in_size, output_image_type_, output_data,
                                    streams_[tid], sample->file_name, sample->roi, use_fast_idct_);
           CacheStore(sample->file_name, output_data, shape, streams_[tid]);
         }, task_priority_seq_--);  // FIFO order, since the samples were already ordered
@@ -846,13 +851,13 @@ class nvJPEGDecoder : public Operator<MixedBackend>, CachedDecoderImpl {
       int j = 0;
       TensorVector<CPUBackend> tv(samples_hw_batched_.size());
 
+      const auto &input = ws.Input<CPUBackend>(0);
       for (auto *sample : samples_hw_batched_) {
         int i = sample->sample_idx;
-        const auto &in = ws.Input<CPUBackend>(0)[i];
         const auto &out_shape = output_shape_.tensor_shape(i);
 
-        tv[j].ShareData(const_cast<Tensor<CPUBackend> &>(in));
-        in_lengths_[j] = in.size();
+        tv.UnsafeSetSample(j, input, i);
+        in_lengths_[j] = input.tensor_shape(i).num_elements();
         nvjpeg_destinations_[j].channel[0] = output.mutable_tensor<uint8_t>(i);
         nvjpeg_destinations_[j].pitch[0] = out_shape[1] * out_shape[2];
         nvjpeg_params_[j] = sample->params;
diff --git a/dali/operators/generic/cast.cc b/dali/operators/generic/cast.cc
index 8850c0bce3f..22090bbb9c9 100644
--- a/dali/operators/generic/cast.cc
+++ b/dali/operators/generic/cast.cc
@@ -51,8 +51,8 @@ void CastCPU::RunImpl(HostWorkspace &ws) {
     TYPE_SWITCH(itype, type2id, IType, CAST_ALLOWED_TYPES, (
 
       for (int sample_id = 0; sample_id < num_samples; sample_id++) {
-        auto *out = output[sample_id].mutable_data<OType>();
-        const auto *in = input[sample_id].data<IType>();
+        auto *out = output.mutable_tensor<OType>(sample_id);
+        const auto *in = input.tensor<IType>(sample_id);
         auto size = input_shape.tensor_size(sample_id);
         tp.AddWork([out, in, size](int thread_id) { CpuHelper<OType, IType>(out, in, size); },
                    size);
diff --git a/dali/operators/generic/constant.cc b/dali/operators/generic/constant.cc
index d01a4f48537..199ca70618a 100644
--- a/dali/operators/generic/constant.cc
+++ b/dali/operators/generic/constant.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -80,7 +80,7 @@ void FillTensorVector(
   assert(is_uniform(shape));
   int64_t n = shape[0].num_elements();
   assert(src.size() == static_cast<size_t>(n) || src.size() == 1);
-  Dst *out = dst[0].mutable_data<Dst>();
+  Dst *out = dst.mutable_tensor<Dst>(0);
   if (src.size() == 1) {
     Dst val = ConvertSat<Dst>(src[0]);
     for (int64_t i = 0; i < n; i++) {
@@ -92,7 +92,7 @@ void FillTensorVector(
     }
   }
   for (int i = 1; i < shape.num_samples(); i++) {
-    dst[i].ShareData(dst[0]);
+    dst.UnsafeSetSample(i, dst, 0);
   }
 }
 }  // namespace
@@ -116,7 +116,7 @@ void Constant<CPUBackend>::RunImpl(HostWorkspace &ws) {
   out.Resize(output_shape_);
   int N = output_shape_.num_samples();
   for (int i = 0; i < N; i++) {
-    assert(out[i].raw_data() == output_[i].raw_data());
+    assert(out.raw_tensor(i) == output_.raw_tensor(i));
   }
   out.SetLayout(layout_);
 }
diff --git a/dali/operators/generic/erase/erase_utils.h b/dali/operators/generic/erase/erase_utils.h
index bdbc7ffa9b0..9ac20e82642 100644
--- a/dali/operators/generic/erase/erase_utils.h
+++ b/dali/operators/generic/erase/erase_utils.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -95,17 +95,17 @@ std::vector<kernels::EraseArgs<T, Dims>> GetEraseArgs(const OpSpec &spec,
 
   for (int i = 0; i < nsamples; i++) {
     if (has_tensor_roi_anchor) {
-      const auto& anchor = ws.ArgumentInput("anchor")[i];
-      assert(anchor.size() > 0);
-      roi_anchor.resize(anchor.size());
-      std::memcpy(roi_anchor.data(), anchor.data<float>(), sizeof(float) * roi_anchor.size());
+      const auto& anchor = view<const float>(ws.ArgumentInput("anchor")[i]);
+      assert(anchor.shape.num_elements() > 0);
+      roi_anchor.resize(anchor.shape.num_elements());
+      std::memcpy(roi_anchor.data(), anchor.data, sizeof(float) * roi_anchor.size());
     }
 
     if (has_tensor_roi_shape) {
-      const auto& shape = ws.ArgumentInput("shape")[i];
-      assert(shape.size() > 0);
-      roi_shape.resize(shape.size());
-      std::memcpy(roi_shape.data(), shape.data<float>(), sizeof(float) * roi_shape.size());
+      const auto& shape = view<const float>(ws.ArgumentInput("shape")[i]);
+      assert(shape.shape.num_elements() > 0);
+      roi_shape.resize(shape.num_elements());
+      std::memcpy(roi_shape.data(), shape.data, sizeof(float) * roi_shape.size());
     }
 
     DALI_ENFORCE(roi_anchor.size() == roi_shape.size());
diff --git a/dali/operators/generic/lookup_table.cc b/dali/operators/generic/lookup_table.cc
index ce66d5d8365..ca62c4440a0 100644
--- a/dali/operators/generic/lookup_table.cc
+++ b/dali/operators/generic/lookup_table.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,8 +25,8 @@ void LookupValuesImpl(ThreadPool &tp, TensorVector<CPUBackend> &output,
                       const Output *lookup_table, const Output default_value) {
   for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) {
     auto data_size = shape.tensor_size(sample_idx);
-    auto *out_data = output[sample_idx].mutable_data<Output>();
-    const auto *in_data = input[sample_idx].data<Input>();
+    auto *out_data = output.mutable_tensor<Output>(sample_idx);
+    const auto *in_data = input.tensor<Input>(sample_idx);
     tp.AddWork(
         [=](int thread_id) {
           for (int64_t i = 0; i < data_size; i++) {
diff --git a/dali/operators/generic/permute_batch.cc b/dali/operators/generic/permute_batch.cc
index dacfefae0eb..ddb7961a409 100644
--- a/dali/operators/generic/permute_batch.cc
+++ b/dali/operators/generic/permute_batch.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -45,7 +45,8 @@ void PermuteBatch<CPUBackend>::RunImpl(HostWorkspace &ws) {
     int src = indices_[i];
     tp.AddWork([&, i, src](int tid) {
       output.SetMeta(i, input.GetMeta(i));
-      output[i].Copy(input[src]);
+      // TODO(klecki): SetSample
+      output.UnsafeCopySample(i, input, src);
     }, size);
   }
   tp.RunAll();
diff --git a/dali/operators/generic/reshape.cc b/dali/operators/generic/reshape.cc
index ad5446d76c8..bb3361f6c49 100644
--- a/dali/operators/generic/reshape.cc
+++ b/dali/operators/generic/reshape.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -393,8 +393,8 @@ void Reshape<CPUBackend>::RunImpl(HostWorkspace &ws) {
   out.Resize(output_shape_, output_type_->id());
   int N = output_shape_.num_samples();
   for (int i = 0; i < N; i++) {
-    assert(out[i].raw_data() == in[i].raw_data());
-    assert(out[i].shape() == output_shape_[i]);
+    assert(out.raw_tensor(i) == in.raw_tensor(i));
+    assert(out.tensor_shape(i) == output_shape_[i]);
   }
   out.SetLayout(layout);
 }
diff --git a/dali/operators/generic/shapes.h b/dali/operators/generic/shapes.h
index 3280fb8d545..9d57229214f 100644
--- a/dali/operators/generic/shapes.h
+++ b/dali/operators/generic/shapes.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -77,7 +77,7 @@ class Shapes : public Operator<Backend> {
     int n = out.num_samples();
     assert(n == shape.num_samples());
     for (int i = 0; i < n; i++) {
-      type *data = out[i].mutable_data<type>();
+      type *data = out.mutable_tensor<type>(i);
       auto sample_shape = shape.tensor_shape_span(i);
       for (int j = 0; j < shape.sample_dim(); j++)
         data[j] = sample_shape[j];
diff --git a/dali/operators/generic/slice/slice_base.cc b/dali/operators/generic/slice/slice_base.cc
index 38f2d69798f..e0b69e4bcd4 100644
--- a/dali/operators/generic/slice/slice_base.cc
+++ b/dali/operators/generic/slice/slice_base.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -58,7 +58,6 @@ bool SliceBaseCpu<OutputType, InputType, Dims>::SetupImpl(std::vector<OutputDesc
   output_desc[0].shape.resize(nsamples, Dims);
 
   kernels::KernelContext ctx;
-  auto in_view = view<const InputType, Dims>(input);
   for (int i = 0; i < nsamples; i++) {
     auto in_view = view<const InputType, Dims>(input[i]);
     auto req = Kernel().Setup(ctx, in_view, args_[i]);
diff --git a/dali/operators/generic/transpose/transpose.cc b/dali/operators/generic/transpose/transpose.cc
index 294407f05e8..cb28194ad25 100644
--- a/dali/operators/generic/transpose/transpose.cc
+++ b/dali/operators/generic/transpose/transpose.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -46,8 +46,8 @@ class TransposeCPU : public Transpose<CPUBackend> {
             TensorShape<> src_ts = input.shape()[i];
             auto dst_ts = permute(src_ts, perm_);
             kernels::TransposeGrouped(
-                TensorView<StorageCPU, T>{output[i].mutable_data<T>(), dst_ts},
-                TensorView<StorageCPU, const T>{input[i].data<T>(), src_ts}, make_cspan(perm_));
+                TensorView<StorageCPU, T>{output.mutable_tensor<T>(i), dst_ts},
+                TensorView<StorageCPU, const T>{input.tensor<T>(i), src_ts}, make_cspan(perm_));
           }, out_shape.tensor_size(i));
       }
     ), DALI_FAIL(make_string("Unsupported input type: ", input_type)));  // NOLINT
diff --git a/dali/operators/geometry/coord_flip.cc b/dali/operators/geometry/coord_flip.cc
index 2e9ee69ef8f..929810fc61f 100644
--- a/dali/operators/geometry/coord_flip.cc
+++ b/dali/operators/geometry/coord_flip.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -77,11 +77,11 @@ void CoordFlipCPU::RunImpl(workspace_t<CPUBackend> &ws) {
     mirrored_origin[y_dim_] = 2.0f * spec_.GetArgument<float>("center_y", &ws, sample_id);
     mirrored_origin[z_dim_] = 2.0f * spec_.GetArgument<float>("center_z", &ws, sample_id);
 
-    auto in_size = volume(input[sample_id].shape());
+    auto in_size = volume(input.tensor_shape(sample_id));
     thread_pool.AddWork(
         [this, &input, in_size, &output, sample_id, flip_dim, mirrored_origin](int thread_id) {
-          const auto *in = input[sample_id].data<float>();
-          auto *out = output[sample_id].mutable_data<float>();
+          const auto *in = input.tensor<float>(sample_id);
+          auto *out = output.mutable_tensor<float>(sample_id);
           int d = 0;
           int64_t i = 0;
           for (; i < in_size; i++, d++) {
diff --git a/dali/operators/geometry/mt_transform_attr_test.cc b/dali/operators/geometry/mt_transform_attr_test.cc
index 12d71dbeb90..2d3b03e3652 100644
--- a/dali/operators/geometry/mt_transform_attr_test.cc
+++ b/dali/operators/geometry/mt_transform_attr_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -155,11 +155,11 @@ TEST(MTTransformAttr, MInputTInput) {
   Minp->Resize(Mtls, DALI_FLOAT);
   Tinp->Resize(Ttls, DALI_FLOAT);
   for (int i = 0; i < N; i++) {
-    float *data = (*Minp)[i].mutable_data<float>();
+    float *data = Minp->mutable_tensor<float>(i);
     for (int j = 0; j < volume(Mtls[i]); j++)
       data[j] = 1 + j + i * 10;
 
-    data = (*Tinp)[i].mutable_data<float>();
+    data = Tinp->mutable_tensor<float>(i);
     for (int j = 0; j < volume(Ttls[i]); j++)
       data[j] = 10 + j * 10  + i * 100;
   }
@@ -198,9 +198,9 @@ TEST(MTTransformAttr, MScalarInputTScalarInput) {
   Minp->Resize(tls, DALI_FLOAT);
   Tinp->Resize(tls, DALI_FLOAT);
   for (int i = 0; i < N; i++) {
-    float *data = (*Minp)[i].mutable_data<float>();
+    float *data = Minp->mutable_tensor<float>(i);
     data[0] = i + 10;
-    data = (*Tinp)[i].mutable_data<float>();
+    data = Tinp->mutable_tensor<float>(i);
     data[0] = i + 100;
   }
 
@@ -233,7 +233,7 @@ TEST(MTTransformAttr, MTInput) {
   int N = tls.num_samples();;
   MTinp->Resize(tls, DALI_FLOAT);
   for (int i = 0; i < N; i++) {
-    auto *data = (*MTinp)[i].mutable_data<float>();
+    auto *data = MTinp->mutable_tensor<float>(i);
     for (int j = 0; j < volume(tls[i]); j++)
       data[j] = 1 + j + i * 10;
   }
@@ -342,7 +342,7 @@ TEST(MTTransformAttr, MTInput_ErrorSize) {
   int N = tls.num_samples();;
   MTinp->Resize(tls, DALI_FLOAT);
   for (int i = 0; i < N; i++) {
-    auto *data = (*MTinp)[i].mutable_data<float>();
+    auto *data = MTinp->mutable_tensor<float>(i);
     for (int j = 0; j < volume(tls[i]); j++)
       data[j] = 1 + j + i * 10;
   }
diff --git a/dali/operators/image/convolution/gaussian_blur.cc b/dali/operators/image/convolution/gaussian_blur.cc
index 60042b28c78..37cae5a7446 100644
--- a/dali/operators/image/convolution/gaussian_blur.cc
+++ b/dali/operators/image/convolution/gaussian_blur.cc
@@ -108,10 +108,10 @@ class GaussianBlurOpCpu : public OpImplBase<CPUBackend> {
       params_[i] = ObtainSampleParams<axes>(i, spec_, ws);
       windows_[i].PrepareWindows(params_[i]);
       // We take only last `ndim` siginificant dimensions to handle sequences as well
-      auto elem_shape = input[i].shape().template last<ndim>();
+      auto elem_shape = input.tensor_shape(i).template last<ndim>();
       auto& req = kmgr_.Setup<Kernel>(i, ctx_, elem_shape, params_[i].window_sizes);
       // The shape of data stays untouched
-      output_desc[0].shape.set_tensor_shape(i, input[i].shape());
+      output_desc[0].shape.set_tensor_shape(i, input.tensor_shape(i));
     }
     return true;
   }
@@ -125,7 +125,7 @@ class GaussianBlurOpCpu : public OpImplBase<CPUBackend> {
 
     int nsamples = input.shape().num_samples();
     for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
-      const auto& shape = input[sample_idx].shape();
+      const auto& shape = input.tensor_shape(sample_idx);
       auto elem_volume = volume(shape.begin() + dim_desc_.usable_axes_start, shape.end());
 
       int seq_elements = 1;
@@ -138,11 +138,11 @@ class GaussianBlurOpCpu : public OpImplBase<CPUBackend> {
         thread_pool.AddWork(
             [this, &input, &output, sample_idx, elem_idx, stride](int thread_id) {
               auto gaussian_windows = windows_[sample_idx].GetWindows();
-              auto elem_shape = input[sample_idx].shape().template last<ndim>();
+              auto elem_shape = input.tensor_shape(sample_idx).template last<ndim>();
               auto in_view = TensorView<StorageCPU, const In, ndim>{
-                  input[sample_idx].template data<In>() + stride * elem_idx, elem_shape};
+                  input.template tensor<In>(sample_idx) + stride * elem_idx, elem_shape};
               auto out_view = TensorView<StorageCPU, Out, ndim>{
-                  output[sample_idx].template mutable_data<Out>() + stride * elem_idx, elem_shape};
+                  output.template mutable_tensor<Out>(sample_idx) + stride * elem_idx, elem_shape};
               // I need a context for that particular run (or rather matching the thread &
               // scratchpad)
               auto ctx = ctx_;
diff --git a/dali/operators/image/convolution/laplacian.cc b/dali/operators/image/convolution/laplacian.cc
index 106d662d796..6cec3a3879f 100644
--- a/dali/operators/image/convolution/laplacian.cc
+++ b/dali/operators/image/convolution/laplacian.cc
@@ -141,10 +141,10 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {
     kmgr_.template Resize<Kernel>(nsamples);
     for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
       // We take only last `ndim` siginificant dimensions to handle sequences as well
-      auto elem_shape = input[sample_idx].shape().template last<ndim>();
+      auto elem_shape = input.shape()[sample_idx].template last<ndim>();
       kmgr_.Setup<Kernel>(sample_idx, ctx_, elem_shape, args.GetWindowSizes(sample_idx));
       // The shape of data stays untouched
-      output_desc[0].shape.set_tensor_shape(sample_idx, input[sample_idx].shape());
+      output_desc[0].shape.set_tensor_shape(sample_idx, input.tensor_shape(sample_idx));
     }
     return true;
   }
@@ -158,7 +158,7 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {
     int nsamples = input.shape().num_samples();
 
     for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
-      const auto& shape = input[sample_idx].shape();
+      const auto& shape = input.tensor_shape(sample_idx);
       auto elem_volume = volume(shape.begin() + dim_desc_.usable_axes_start, shape.end());
       auto priority = elem_volume * args.GetTotalWindowSizes(sample_idx);
       int seq_elements = volume(shape.begin(), shape.begin() + dim_desc_.usable_axes_start);
@@ -170,9 +170,9 @@ class LaplacianOpCpu : public OpImplBase<CPUBackend> {
               const auto& scales = args.GetScales(sample_idx);
               auto elem_shape = input[sample_idx].shape().template last<ndim>();
               auto in_view = TensorView<StorageCPU, const In, ndim>{
-                  input[sample_idx].template data<In>() + stride * elem_idx, elem_shape};
+                  input.template tensor<In>(sample_idx) + stride * elem_idx, elem_shape};
               auto out_view = TensorView<StorageCPU, Out, ndim>{
-                  output[sample_idx].template mutable_data<Out>() + stride * elem_idx, elem_shape};
+                  output.template mutable_tensor<Out>(sample_idx) + stride * elem_idx, elem_shape};
               // Copy context so that the kernel instance can modify scratchpad
               auto ctx = ctx_;
               kmgr_.Run<Kernel>(sample_idx, ctx, out_view, in_view, windows_[sample_idx],
diff --git a/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu b/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu
index d9a377e2cfc..0bd2c886e0b 100644
--- a/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu
+++ b/dali/operators/image/distortion/jpeg_compression_distortion_op_gpu.cu
@@ -71,7 +71,7 @@ void JpegCompressionDistortionGPU::RunImpl(workspace_t<GPUBackend> &ws) {
   // Set quality argument for an image from samples
   if (is_sequence) {
     for (int i = 0; i < nsamples; i++) {
-      auto nframes = input.tensor_shape_span(i)[0];
+      auto nframes = input.shape().tensor_shape_span(i)[0];
       for (int j = 0; j < nframes; ++j) {
         quality_.push_back(quality_arg_[i].data[0]);
       }
diff --git a/dali/operators/image/peek_shape/peek_image_shape.h b/dali/operators/image/peek_shape/peek_image_shape.h
index 7b15129182c..9a97122293d 100644
--- a/dali/operators/image/peek_shape/peek_image_shape.h
+++ b/dali/operators/image/peek_shape/peek_image_shape.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,8 +16,10 @@
 #define DALI_OPERATORS_IMAGE_PEEK_SHAPE_PEEK_IMAGE_SHAPE_H_
 
 #include <vector>
+#include "dali/core/backend_tags.h"
 #include "dali/image/image_factory.h"
 #include "dali/core/tensor_shape.h"
+#include "dali/pipeline/data/types.h"
 #include "dali/pipeline/operator/operator.h"
 #include "dali/core/static_switch.h"
 
@@ -62,10 +64,9 @@ class PeekImageShape : public Operator<CPUBackend> {
   }
 
   template <typename type>
-  void WriteShape(Tensor<CPUBackend> &out, const TensorShape<3> &shape) {
-    type *data = out.mutable_data<type>();
+  void WriteShape(TensorView<StorageCPU, type, 1> out, const TensorShape<3> &shape) {
     for (int i = 0; i < 3; ++i) {
-      data[i] = shape[i];
+      out.data[i] = shape[i];
     }
   }
 
@@ -74,20 +75,22 @@ class PeekImageShape : public Operator<CPUBackend> {
     const auto &input = ws.template Input<CPUBackend>(0);
     auto &output = ws.template Output<CPUBackend>(0);
     size_t batch_size = input.num_samples();
+    DALI_ENFORCE(input.type() == DALI_UINT8, "Input must be stored as uint8 data.");
 
     for (size_t sample_id = 0; sample_id < batch_size; ++sample_id) {
       thread_pool.AddWork([sample_id, &input, &output, this] (int tid) {
         const auto& image = input[sample_id];
         // Verify input
-        DALI_ENFORCE(image.ndim() == 1,
+        // TODO(klecki): Move the checks to scope above
+        DALI_ENFORCE(image.shape().sample_dim() == 1,
                       "Input must be 1D encoded jpeg string.");
-        DALI_ENFORCE(IsType<uint8>(image.type()),
-                      "Input must be stored as uint8 data.");
-        auto img = ImageFactory::CreateImage(image.data<uint8>(), image.size(), {});
+        DALI_ENFORCE(image.type() == DALI_UINT8, "Input must be stored as uint8 data.");
+        auto img =
+            ImageFactory::CreateImage(image._data<uint8>(), image.shape().num_elements(), {});
         auto shape = img->PeekShape();
         TYPE_SWITCH(output_type_, type2id, type,
                 (int32_t, uint32_t, int64_t, uint64_t, float, double),
-          (WriteShape<type>(output[sample_id], shape);),
+          (WriteShape(view<type, 1>(output[sample_id]), shape);),
           (DALI_FAIL(make_string("Unsupported type for Shapes: ", output_type_))));
       }, 0);
       // the amount of work depends on the image format and exact sample which is unknown here
diff --git a/dali/operators/image/remap/displacement_filter_impl_cpu.h b/dali/operators/image/remap/displacement_filter_impl_cpu.h
index 15a272672b6..afe2ac823c6 100644
--- a/dali/operators/image/remap/displacement_filter_impl_cpu.h
+++ b/dali/operators/image/remap/displacement_filter_impl_cpu.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 
 #include "dali/core/common.h"
 #include "dali/operators/image/remap/displacement_filter.h"
+#include "dali/pipeline/data/sample_view.h"
 #include "dali/pipeline/data/views.h"
 #include "dali/kernels/kernel_params.h"
 #include "dali/kernels/imgproc/sampler.h"
@@ -90,7 +91,7 @@ class DisplacementFilter<CPUBackend, Displacement, per_channel_transform>
   }
 
   template <typename Out, typename In, DALIInterpType interp>
-  void RunWarp(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input, int thread_idx) {
+  void RunWarp(SampleView<CPUBackend> output, ConstSampleView<CPUBackend> input, int thread_idx) {
     auto &displace = displace_[thread_idx];
     In fill[1024];
     auto in = view<const Out, 3>(input);
@@ -109,9 +110,9 @@ class DisplacementFilter<CPUBackend, Displacement, per_channel_transform>
 
     PrepareDisplacement(ws, sample_idx, thread_idx);
 
-    if (!has_mask_ || (*mask_)[sample_idx].data<int>()[0]) {
-      const auto &in_tensor = input[sample_idx];
-      auto &out_tensor = output[sample_idx];
+    if (!has_mask_ || mask_->tensor<int>(sample_idx)[0]) {
+      auto in_tensor = input[sample_idx];
+      auto out_tensor = output[sample_idx];
 
       switch (interp_type_) {
         case DALI_INTERP_NN:
@@ -138,9 +139,7 @@ class DisplacementFilter<CPUBackend, Displacement, per_channel_transform>
               " only NN and LINEAR are supported for this operation");
       }
     } else {
-      const auto &in_tensor = input[sample_idx];
-      auto &out_tensor = output[sample_idx];
-      out_tensor.Copy(in_tensor);
+      output.UnsafeCopySample(sample_idx, input, sample_idx);
     }
   }
 
diff --git a/dali/operators/image/remap/displacement_filter_impl_gpu.cuh b/dali/operators/image/remap/displacement_filter_impl_gpu.cuh
index 7064e327b23..915d1955493 100644
--- a/dali/operators/image/remap/displacement_filter_impl_gpu.cuh
+++ b/dali/operators/image/remap/displacement_filter_impl_gpu.cuh
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -325,7 +325,7 @@ class DisplacementFilter<GPUBackend, Displacement,
       sample.input = input.template tensor<T>(sample_idx);
       sample.raw_params = GetDisplacementParams(sample_idx);
       sample.shape = shape.tensor_shape<nDims>(sample_idx);
-      sample.mask = has_mask_ ? ws.ArgumentInput("mask")[sample_idx].data<int>()[0] : true;
+      sample.mask = has_mask_ ? (ws.ArgumentInput("mask").tensor<int>(sample_idx))[0] : true;
     }
 
     samples_dev_.from_host(samples_, stream);
diff --git a/dali/operators/image/remap/warp_affine_params.h b/dali/operators/image/remap/warp_affine_params.h
index cb6baeae248..0d53de20d1e 100644
--- a/dali/operators/image/remap/warp_affine_params.h
+++ b/dali/operators/image/remap/warp_affine_params.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -139,9 +139,9 @@ class WarpAffineParamProvider
     auto *params = this->template AllocParams<mm::memory_kind::host>();
     for (int i = 0; i < num_samples_; i++) {
       if (invert) {
-        params[i] = static_cast<const MappingParams *>(input[i].raw_data())->inv();
+        params[i] = static_cast<const MappingParams *>(input.raw_tensor(i))->inv();
       } else {
-        params[i] = *static_cast<const MappingParams *>(input[i].raw_data());
+        params[i] = *static_cast<const MappingParams *>(input.raw_tensor(i));
       }
     }
 }
diff --git a/dali/operators/image/remap/warp_param_provider.h b/dali/operators/image/remap/warp_param_provider.h
index abf9a61b292..44c79e5ff34 100644
--- a/dali/operators/image/remap/warp_param_provider.h
+++ b/dali/operators/image/remap/warp_param_provider.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -47,7 +47,7 @@ class InterpTypeProvider {
         "interp_type must be a single value or contain one value per sample");
       interp_types_.resize(n);
       for (int i = 0; i < n; i++)
-        interp_types_[i] = tensor_vector[i].data<DALIInterpType>()[0];
+        interp_types_[i] = tensor_vector.tensor<DALIInterpType>(i)[0];
     } else {
       interp_types_.resize(1, spec.template GetArgument<DALIInterpType>("interp_type"));
     }
diff --git a/dali/operators/math/expressions/expression_impl_factory.h b/dali/operators/math/expressions/expression_impl_factory.h
index 25a920df659..a52215a9a61 100644
--- a/dali/operators/math/expressions/expression_impl_factory.h
+++ b/dali/operators/math/expressions/expression_impl_factory.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -130,7 +130,7 @@ struct ExprImplTask {
 };
 
 inline OutputSamplePtr GetOutputSamplePointer(HostWorkspace &ws, int output_idx, int sample_idx) {
-  return ws.template Output<CPUBackend>(output_idx)[sample_idx].raw_mutable_data();
+  return ws.template Output<CPUBackend>(output_idx).raw_mutable_tensor(sample_idx);
 }
 
 inline OutputSamplePtr GetOutputSamplePointer(DeviceWorkspace &ws, int output_idx, int sample_idx) {
@@ -138,7 +138,7 @@ inline OutputSamplePtr GetOutputSamplePointer(DeviceWorkspace &ws, int output_id
 }
 
 inline InputSamplePtr GetInputSamplePointer(HostWorkspace &ws, int input_idx, int sample_idx) {
-  return ws.template Input<CPUBackend>(input_idx)[sample_idx].raw_data();
+  return ws.template Input<CPUBackend>(input_idx).raw_tensor(sample_idx);
 }
 
 inline InputSamplePtr GetInputSamplePointer(DeviceWorkspace &ws, int input_idx, int sample_idx) {
diff --git a/dali/operators/numba_function/numba_func.cc b/dali/operators/numba_function/numba_func.cc
index 159d2d4e220..6fc514f17af 100644
--- a/dali/operators/numba_function/numba_func.cc
+++ b/dali/operators/numba_function/numba_func.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -260,13 +260,13 @@ void NumbaFuncImpl<CPUBackend>::RunImpl(workspace_t<CPUBackend> &ws) {
   for (size_t out_id = 0; out_id < out_types_.size(); out_id++) {
     auto& out = ws.Output<CPUBackend>(out_id);
     for (int i = 0; i < N; i++) {
-      out_ptrs[N * out_id + i] = reinterpret_cast<uint64_t>(out[i].raw_mutable_data());
+      out_ptrs[N * out_id + i] = reinterpret_cast<uint64_t>(out.raw_mutable_tensor(i));
     }
   }
   for (size_t in_id = 0; in_id < in_types_.size(); in_id++) {
     auto& in = ws.Input<CPUBackend>(in_id);
     for (int i = 0; i < N; i++) {
-      in_ptrs[N * in_id + i] = reinterpret_cast<uint64_t>(in[i].raw_data());
+      in_ptrs[N * in_id + i] = reinterpret_cast<uint64_t>(in.raw_tensor(i));
     }
   }
 
diff --git a/dali/operators/python_function/dltensor_function.cc b/dali/operators/python_function/dltensor_function.cc
index 0adab7da4fb..bc2d75d6b2b 100644
--- a/dali/operators/python_function/dltensor_function.cc
+++ b/dali/operators/python_function/dltensor_function.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -77,9 +77,9 @@ py::list PrepareDLTensorInputs<CPUBackend>(HostWorkspace &ws) {
   py::list input_tuple;
   for (Index idx = 0; idx < ws.NumInput(); ++idx) {
     py::list dl_tensor_list;
+    auto &tvec = ws.UnsafeMutableInput<CPUBackend>(idx);
     for (Index i = 0; i < ws.GetInputBatchSize(idx); ++i) {
-      auto &t = ws.UnsafeMutableInput<CPUBackend>(idx)[i];
-      auto dl_capsule = TensorToDLPackView(t);
+      auto dl_capsule = TensorToDLPackView(tvec[i], tvec.device_id());
       dl_tensor_list.append(dl_capsule);
     }
     input_tuple.append(dl_tensor_list);
@@ -106,8 +106,8 @@ py::list PrepareDLTensorInputsPerSample<CPUBackend>(HostWorkspace &ws) {
   for (Index s = 0; s < batch_size; ++s) {
     py::list tuple;
     for (Index idx = 0; idx < ws.NumInput(); ++idx) {
-      auto &t = ws.UnsafeMutableInput<CPUBackend>(idx)[s];
-      auto dl_capsule = TensorToDLPackView(t);
+      auto &tvec = ws.UnsafeMutableInput<CPUBackend>(idx);
+      auto dl_capsule = TensorToDLPackView(tvec[s], tvec.device_id());
       tuple.append(dl_capsule);
     }
     input_tuples.append(tuple);
@@ -148,7 +148,7 @@ void CopyOutputData(TensorVector<CPUBackend> &output, std::vector<DLMTensorPtr>
   auto out_shape = output.shape();
   for (int i = 0; i < batch_size; ++i) {
     thread_pool.AddWork([&, i](int) {
-      CopyDlTensor<CPUBackend>(output[i].raw_mutable_data(), dl_tensors[i]);
+      CopyDlTensor<CPUBackend>(output.raw_mutable_tensor(i), dl_tensors[i]);
     }, out_shape.tensor_size(i));
   }
   thread_pool.RunAll();
diff --git a/dali/operators/reader/loader/nemo_asr_loader.cc b/dali/operators/reader/loader/nemo_asr_loader.cc
index c3fe9d57801..d547a7d0745 100644
--- a/dali/operators/reader/loader/nemo_asr_loader.cc
+++ b/dali/operators/reader/loader/nemo_asr_loader.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -108,7 +108,7 @@ void NemoAsrLoader::PrepareEmpty(AsrSample &sample) {
 }
 
 template <typename OutputType>
-void NemoAsrLoader::ReadAudio(Tensor<CPUBackend> &audio,
+void NemoAsrLoader::ReadAudio(SampleView<CPUBackend> audio,
                               const AudioMetadata &audio_meta,
                               const NemoAsrEntry &entry,
                               AudioDecoderBase &decoder,
@@ -170,7 +170,7 @@ void NemoAsrLoader::ReadSample(AsrSample& sample) {
 
   TYPE_SWITCH(dtype_, type2id, OutputType, (int16_t, int32_t, float), (
     // Audio decoding will be run in the prefetch function, once the batch is formed
-    sample.decode_f_ = [this, &sample, &entry, offset](Tensor<CPUBackend> &audio, int tid) {
+    sample.decode_f_ = [this, &sample, &entry, offset](SampleView<CPUBackend> audio, int tid) {
       sample.decoder().OpenFromFile(entry.audio_filepath);
       if (offset > 0)
         sample.decoder().SeekFrames(offset);
diff --git a/dali/operators/reader/loader/nemo_asr_loader.h b/dali/operators/reader/loader/nemo_asr_loader.h
index 218573cd2e1..e1cf137e82b 100644
--- a/dali/operators/reader/loader/nemo_asr_loader.h
+++ b/dali/operators/reader/loader/nemo_asr_loader.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -65,7 +65,7 @@ class AsrSample {
     return shape_;
   }
 
-  void decode_audio(Tensor<CPUBackend>& audio, int tid) {
+  void decode_audio(SampleView<CPUBackend> audio, int tid) {
     decode_f_(audio, tid);
   }
 
@@ -87,7 +87,7 @@ class AsrSample {
   std::string audio_filepath_;  // for tensor metadata purposes
   TensorShape<> shape_;
 
-  std::function<void(Tensor<CPUBackend>&, int)> decode_f_;
+  std::function<void(SampleView<CPUBackend>, int)> decode_f_;
   std::unique_ptr<AudioDecoderBase> decoder_;
 };
 
@@ -158,7 +158,7 @@ class DLL_PUBLIC NemoAsrLoader : public Loader<CPUBackend, AsrSample> {
 
  private:
   template <typename OutputType>
-  void ReadAudio(Tensor<CPUBackend> &audio,
+  void ReadAudio(SampleView<CPUBackend> audio,
                  const AudioMetadata &audio_meta,
                  const NemoAsrEntry &entry,
                  AudioDecoderBase &decoder,
diff --git a/dali/operators/reader/loader/nemo_asr_loader_test.cc b/dali/operators/reader/loader/nemo_asr_loader_test.cc
index fd37584b59b..9adb476628b 100644
--- a/dali/operators/reader/loader/nemo_asr_loader_test.cc
+++ b/dali/operators/reader/loader/nemo_asr_loader_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <utility>
 #include <sstream>
 #include <string>
+#include "dali/pipeline/data/backend.h"
 #include "dali/test/dali_test_config.h"
 #include "dali/pipeline/data/views.h"
 #include "dali/test/tensor_test_utils.h"
@@ -219,7 +220,8 @@ TEST(NemoAsrLoaderTest, ReadSample) {
     Tensor<CPUBackend> sample_audio;
     loader.ReadSample(sample);
     sample_audio.Resize(sample.shape(), DALI_INT16);
-    sample.decode_audio(sample_audio, 0);
+    auto sample_audio_view = sample_view(sample_audio);
+    sample.decode_audio(sample_audio_view, 0);
     ASSERT_EQ(sample.index(), 0);
     TensorView<StorageCPU, int16_t> ref(ref_data.data(), {ref_samples, 2});
     Check(ref, view<const int16_t>(sample_audio));
@@ -246,7 +248,8 @@ TEST(NemoAsrLoaderTest, ReadSample) {
     Tensor<CPUBackend> sample_audio;
     loader.ReadSample(sample);
     sample_audio.Resize(sample.shape(), DALI_FLOAT);
-    sample.decode_audio(sample_audio, 0);
+    auto sample_audio_view = sample_view(sample_audio);
+    sample.decode_audio(sample_audio_view, 0);
     ASSERT_EQ(sample.index(), 0);
     TensorView<StorageCPU, float> ref(downmixed.data(), {ref_samples});
     Check(ref, view<const float>(sample_audio), EqualEpsRel(1e-5, 1e-5));
@@ -271,7 +274,8 @@ TEST(NemoAsrLoaderTest, ReadSample) {
       loader.PrepareMetadata();
       loader.ReadSample(sample);
       sample_audio.Resize(sample.shape(), DALI_FLOAT);
-      sample.decode_audio(sample_audio, 0);
+    auto sample_audio_view = sample_view(sample_audio);
+      sample.decode_audio(sample_audio_view, 0);
     }
 
     int64_t downsampled_len =
@@ -302,7 +306,10 @@ TEST(NemoAsrLoaderTest, ReadSample) {
       loader.PrepareMetadata();
       loader.ReadSample(sample_int16);
       sample_int16_audio.Resize(sample_int16.shape(), DALI_INT16);
-      sample_int16.decode_audio(sample_int16_audio, 0);
+      SampleView<CPUBackend> sample_audio_view(sample_int16_audio.raw_mutable_data(),
+                                               sample_int16_audio.shape(),
+                                               sample_int16_audio.type());
+      sample_int16.decode_audio(sample_audio_view, 0);
     }
 
     ASSERT_EQ(volume(sample_audio.shape()), volume(sample_int16_audio.shape()));
@@ -381,7 +388,8 @@ TEST(NemoAsrLoaderTest, ReadSample_OffsetAndDuration) {
     TensorShape<> expected_sh{length, 2};
     ASSERT_EQ(expected_sh, sample.shape());
     sample_audio.Resize(sample.shape(), DALI_INT16);
-    sample.decode_audio(sample_audio, 0);
+    auto sample_audio_view = sample_view(sample_audio);
+    sample.decode_audio(sample_audio_view, 0);
 
     TensorView<StorageCPU, int16_t> ref(ref_data.data() + offset * 2, expected_sh);
     Check(ref, view<const int16_t>(sample_audio));
diff --git a/dali/operators/reader/nemo_asr_reader_op.cc b/dali/operators/reader/nemo_asr_reader_op.cc
index 8aad58802ad..4d98793b9c0 100755
--- a/dali/operators/reader/nemo_asr_reader_op.cc
+++ b/dali/operators/reader/nemo_asr_reader_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #include "dali/operators/reader/nemo_asr_reader_op.h"
+#include "dali/pipeline/data/backend.h"
+#include "dali/pipeline/data/types.h"
 
 namespace dali {
 
@@ -163,8 +165,8 @@ void NemoAsrReader::Prefetch() {
   // Waiting until all the audio samples are ready to be consumed
   decoded_map_.clear();
   for (int i = 0; i < nsamples; i++) {
-    auto &sample = *curr_batch[i];
-    auto &audio = audio_batch[i];
+    AsrSample &sample = *curr_batch[i];
+    SampleView<CPUBackend> audio = audio_batch[i];
 
     if (decoded_map_.find(&sample) != decoded_map_.end())
       continue;
@@ -173,7 +175,7 @@ void NemoAsrReader::Prefetch() {
     const auto &audio_meta = sample.audio_meta();
     int64_t priority = audio_meta.length * audio_meta.channels;
     thread_pool_.AddWork(
-      [&audio, &sample](int tid) {
+      [audio, &sample](int tid) {
         sample.decode_audio(audio, tid);
       }, priority);
   }
@@ -183,7 +185,7 @@ void NemoAsrReader::Prefetch() {
     for (int i = 0; i < nsamples; i++) {
       auto it = decoded_map_.find(curr_batch[i].get());
       if (it != decoded_map_.end() && it->second != i) {
-        audio_batch[i].Copy(audio_batch[it->second]);
+        audio_batch.UnsafeCopySample(i, audio_batch, it->second);
       }
     }
   }
@@ -191,10 +193,13 @@ void NemoAsrReader::Prefetch() {
 
 void NemoAsrReader::RunImpl(SampleWorkspace &ws) {
   const auto &sample = GetSample(ws.data_idx());
-  const auto &sample_audio = GetDecodedAudioSample(ws.data_idx());
+  auto sample_audio = GetDecodedAudioSample(ws.data_idx());
 
   auto &audio = ws.Output<CPUBackend>(0);
-  audio.Copy(sample_audio);
+  audio.Resize(sample_audio.shape(), sample_audio.type());
+  std::memcpy(
+      audio.raw_mutable_data(), sample_audio._raw_data(),
+      sample_audio.shape().num_elements() * TypeTable::GetTypeInfo(sample_audio.type()).size());
 
   DALIMeta meta;
   meta.SetSourceInfo(sample.audio_filepath());
@@ -226,8 +231,8 @@ void NemoAsrReader::RunImpl(SampleWorkspace &ws) {
   }
 }
 
-Tensor<CPUBackend>& NemoAsrReader::GetDecodedAudioSample(int sample_idx) {
-  auto &curr_batch = *prefetched_decoded_audio_[curr_batch_consumer_];
+ConstSampleView<CPUBackend> NemoAsrReader::GetDecodedAudioSample(int sample_idx) {
+  const auto &curr_batch = *prefetched_decoded_audio_[curr_batch_consumer_];
   return curr_batch[sample_idx];
 }
 
diff --git a/dali/operators/reader/nemo_asr_reader_op.h b/dali/operators/reader/nemo_asr_reader_op.h
index 3e864261fe0..c8d00546a50 100644
--- a/dali/operators/reader/nemo_asr_reader_op.h
+++ b/dali/operators/reader/nemo_asr_reader_op.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ class NemoAsrReader : public DataReader<CPUBackend, AsrSample> {
   void RunImpl(SampleWorkspace &ws) override;
 
  private:
-  Tensor<CPUBackend>& GetDecodedAudioSample(int sample_idx);
+  ConstSampleView<CPUBackend> GetDecodedAudioSample(int sample_idx);
 
   bool read_sr_;
   bool read_text_;
diff --git a/dali/operators/reader/numpy_reader_gpu_op.cc b/dali/operators/reader/numpy_reader_gpu_op.cc
index 5e3eb39a78e..c9dbe03e2d9 100644
--- a/dali/operators/reader/numpy_reader_gpu_op.cc
+++ b/dali/operators/reader/numpy_reader_gpu_op.cc
@@ -113,9 +113,9 @@ void NumpyReaderGPU::Prefetch() {
   }
   curr_tensor_list.Resize(tmp_shapes, ref_type);
 
-  size_t chunk_size = static_cast<size_t>( \
-                        div_ceil(static_cast<uint64_t>(curr_tensor_list.nbytes()),
-                                 static_cast<uint64_t>(thread_pool_.NumThreads())));
+  size_t chunk_size =
+      static_cast<size_t>(div_ceil(static_cast<uint64_t>(curr_tensor_list.total_nbytes()),
+                                   static_cast<uint64_t>(thread_pool_.NumThreads())));
 
   // read the data
   for (size_t data_idx = 0; data_idx < curr_tensor_list.num_samples(); ++data_idx) {
diff --git a/dali/operators/reader/numpy_reader_op.cc b/dali/operators/reader/numpy_reader_op.cc
index 26ba6865241..683c45ed096 100644
--- a/dali/operators/reader/numpy_reader_op.cc
+++ b/dali/operators/reader/numpy_reader_op.cc
@@ -14,20 +14,22 @@
 
 #include <string>
 
+#include "dali/core/backend_tags.h"
 #include "dali/kernels/slice/slice_cpu.h"
 #include "dali/kernels/slice/slice_flip_normalize_permute_pad_cpu.h"
 #include "dali/kernels/transpose/transpose.h"
 #include "dali/core/static_switch.h"
 #include "dali/operators/reader/numpy_reader_op.h"
+#include "dali/pipeline/data/backend.h"
 
 namespace dali {
 
-static void CopyHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input,
+static void CopyHelper(SampleView<CPUBackend> output, ConstSampleView<CPUBackend> input,
                        ThreadPool &thread_pool, int min_blk_sz, int req_nblocks) {
-  auto out_ptr = static_cast<uint8_t*>(output.raw_mutable_data());
-  auto in_ptr = static_cast<const uint8_t*>(input.raw_data());
-  auto nelements = volume(input.shape());
-  auto nbytes = input.nbytes();
+  auto *out_ptr = static_cast<uint8_t *>(output._raw_mutable_data());
+  const auto *in_ptr = static_cast<const uint8_t *>(input._raw_data());
+  auto nelements = input.shape().num_elements();
+  auto nbytes = nelements * TypeTable::GetTypeInfo(input.type()).size();
   if (nelements <= min_blk_sz) {
     thread_pool.AddWork([=](int tid) {
       std::memcpy(out_ptr, in_ptr, nbytes);
@@ -45,7 +47,7 @@ static void CopyHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &inp
   }
 }
 
-static void TransposeHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input) {
+static void TransposeHelper(SampleView<CPUBackend> output, ConstSampleView<CPUBackend> input) {
   int n_dims = input.shape().sample_dim();
   SmallVector<int, 6> perm;
   perm.resize(n_dims);
@@ -56,7 +58,7 @@ static void TransposeHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend>
   ), DALI_FAIL(make_string("Unsupported input type: ", input.type())));  // NOLINT
 }
 
-static void SliceHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input,
+static void SliceHelper(SampleView<CPUBackend> output, ConstSampleView<CPUBackend> input,
                         const CropWindow &roi, float fill_value, ThreadPool &thread_pool,
                         int min_blk_sz, int req_nblocks) {
   int ndim = input.shape().sample_dim();
@@ -77,7 +79,7 @@ static void SliceHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &in
   ), DALI_FAIL(make_string("Unsupported number of dimensions: ", ndim)););  // NOLINT
 }
 
-static void SlicePermuteHelper(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input,
+static void SlicePermuteHelper(SampleView<CPUBackend> output, ConstSampleView<CPUBackend> input,
                                const CropWindow &roi, float fill_value, ThreadPool &thread_pool,
                                int min_blk_sz, int req_nblocks) {
   const auto &in_shape = input.shape();
@@ -245,19 +247,20 @@ void NumpyReaderCPU::RunImpl(HostWorkspace &ws) {
     const auto& file_i = GetSample(i);
     const auto& file_sh = file_i.get_shape();
     int64_t sample_sz = volume(file_i.get_shape());
+    auto input_sample = const_sample_view(file_i.data);
     if (need_slice_[i] && need_transpose_[i]) {
-      SlicePermuteHelper(output[i], file_i.data, rois_[i], fill_value_, thread_pool, kThreshold,
+      SlicePermuteHelper(output[i], input_sample, rois_[i], fill_value_, thread_pool, kThreshold,
                          blocks_per_sample);
     } else if (need_slice_[i]) {
-      SliceHelper(output[i], file_i.data, rois_[i], fill_value_, thread_pool, kThreshold,
+      SliceHelper(output[i], input_sample, rois_[i], fill_value_, thread_pool, kThreshold,
                   blocks_per_sample);
     } else if (need_transpose_[i]) {
       // TODO(janton): Parallelize when Transpose supports tiling
-      thread_pool.AddWork([&, i](int tid) {
-        TransposeHelper(output[i], file_i.data);
+      thread_pool.AddWork([&, i, input_sample](int tid) {
+        TransposeHelper(output[i], input_sample);
       }, sample_sz * 8);  // 8 x (heuristic)
     } else {
-      CopyHelper(output[i], file_i.data, thread_pool, kThreshold, blocks_per_sample);
+      CopyHelper(output[i], input_sample, thread_pool, kThreshold, blocks_per_sample);
     }
   }
   thread_pool.RunAll();
diff --git a/dali/operators/reader/video_reader_op.cc b/dali/operators/reader/video_reader_op.cc
index 81dd2040526..a652ed84c0c 100644
--- a/dali/operators/reader/video_reader_op.cc
+++ b/dali/operators/reader/video_reader_op.cc
@@ -47,7 +47,7 @@ void VideoReader::Prefetch() {
     auto &sample = curr_batch[data_idx];
     // TODO(klecki): Rework this with proper sample-based tensor batch data structure
     auto sample_shared_ptr = unsafe_sample_owner(curr_tensor_list, data_idx);
-    sample->sequence.ShareData(sample_shared_ptr, curr_tensor_list.capacity(),
+    sample->sequence.ShareData(sample_shared_ptr, curr_tensor_list.total_capacity(),
                                curr_tensor_list.is_pinned(), curr_tensor_list.shape()[data_idx],
                                curr_tensor_list.type(), curr_tensor_list.order());
     sample->sequence.set_device_id(curr_tensor_list.device_id());
diff --git a/dali/operators/reader/webdataset_reader_op.cc b/dali/operators/reader/webdataset_reader_op.cc
index 2883fb4a23e..7840aecaae9 100644
--- a/dali/operators/reader/webdataset_reader_op.cc
+++ b/dali/operators/reader/webdataset_reader_op.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -52,8 +52,8 @@ void WebdatasetReader::RunImpl(HostWorkspace& ws) {
       auto& sample = GetSample(data_idx);
       ThreadPool::Work copy_task = [output_idx = output_idx, data_idx = data_idx, &output,
                                     &sample](int) {
-        output[data_idx].SetMeta(sample[output_idx].GetMeta());
-        std::memcpy(output[data_idx].raw_mutable_data(), sample[output_idx].raw_data(),
+        output.SetMeta(data_idx, sample[output_idx].GetMeta());
+        std::memcpy(output.raw_mutable_tensor(data_idx), sample[output_idx].raw_data(),
                     sample[output_idx].nbytes());
       };
       if (threaded) {
diff --git a/dali/operators/segmentation/random_object_bbox.cc b/dali/operators/segmentation/random_object_bbox.cc
index c4e7138187c..5864b46d57c 100644
--- a/dali/operators/segmentation/random_object_bbox.cc
+++ b/dali/operators/segmentation/random_object_bbox.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -521,9 +521,9 @@ bool RandomObjectBBox::PickForegroundBox(
 template <typename BlobLabel>
 bool RandomObjectBBox::PickForegroundBox(SampleContext<BlobLabel> &context) {
   bool ret = false;
-  TYPE_SWITCH(context.input->type(), type2id, T, INPUT_TYPES,
-    (ret = PickForegroundBox(context, view<const T>(*context.input));),
-    (DALI_FAIL(make_string("Unsupported input type: ", context.input->type())))
+  TYPE_SWITCH(context.input.type(), type2id, T, INPUT_TYPES,
+    (ret = PickForegroundBox(context, view<const T>(context.input));),
+    (DALI_FAIL(make_string("Unsupported input type: ", context.input.type())))
   );  // NOLINT
   return ret;
 }
@@ -533,7 +533,7 @@ void RandomObjectBBox::AllocateTempStorage(const TensorVector<CPUBackend> &input
   int64_t max_filtered_bytes = 0;
   int N = input.num_samples();
   for (int i = 0; i < N; i++) {
-    int64_t vol = input[i].size();
+    int64_t vol = input[i].shape().num_elements();
     int label_size = vol > 0x80000000 ? 8 : 4;
     int64_t blob_bytes = vol * label_size;
     if (blob_bytes > max_blob_bytes)
@@ -592,10 +592,10 @@ void RandomObjectBBox::RunImpl(HostWorkspace &ws) {
       // We want to limit the size of this auxiliary storage to limit memory traffic.
       // To that end, when the indices fit in int32_t, we use that type for the labels,
       // otherwise we fall back to int64_t.
-      auto blob_label = (input[i].size() > 0x80000000) ? DALI_INT64 : DALI_INT32;
+      auto blob_label = (input[i].shape().num_elements() > 0x80000000) ? DALI_INT64 : DALI_INT32;
       TYPE_SWITCH(blob_label, type2id, BlobLabel, (int32_t, int64_t), (
         auto &ctx = GetContext(BlobLabel());
-        ctx.Init(i, &input[i], &tp, tmp_filtered_storage_, tmp_blob_storage_);
+        ctx.Init(i, input[i], &tp, tmp_filtered_storage_, tmp_blob_storage_);
         ctx.out1 = out1[i];
         if (out2.num_samples() > 0)
           ctx.out2 = out2[i];
diff --git a/dali/operators/segmentation/random_object_bbox.h b/dali/operators/segmentation/random_object_bbox.h
index e6e4574dcb3..c7c94dbe374 100644
--- a/dali/operators/segmentation/random_object_bbox.h
+++ b/dali/operators/segmentation/random_object_bbox.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -175,12 +175,12 @@ class RandomObjectBBox : public Operator<CPUBackend> {
 
   template <typename BlobLabel>
   struct SampleContext {
-    void Init(int sample_idx, const Tensor<CPUBackend> *in, ThreadPool *tp,
+    void Init(int sample_idx, ConstSampleView<CPUBackend> in, ThreadPool *tp,
               Tensor<CPUBackend> &tmp_filtered, Tensor<CPUBackend> &tmp_blob) {
       this->sample_idx = sample_idx;
       thread_pool = tp;
       input = in;
-      auto &shape = input->shape();
+      auto &shape = input.shape();
       tmp_filtered.Resize(shape, DALI_UINT8);
       tmp_blob.Resize(shape, TypeTable::GetTypeId<BlobLabel>());
       filtered = view<uint8_t>(tmp_filtered);
@@ -192,7 +192,7 @@ class RandomObjectBBox : public Operator<CPUBackend> {
 
     ThreadPool *thread_pool = nullptr;
     TensorView<StorageCPU, int> out1, out2;
-    const Tensor<CPUBackend> *input = nullptr;
+    ConstSampleView<CPUBackend> input = {};
 
     int sample_idx;
     int class_idx;
diff --git a/dali/operators/sequence/sequence_rearrange.cc b/dali/operators/sequence/sequence_rearrange.cc
index 70d4a05b654..fffad1a80b7 100644
--- a/dali/operators/sequence/sequence_rearrange.cc
+++ b/dali/operators/sequence/sequence_rearrange.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -79,9 +79,9 @@ void SequenceRearrange<CPUBackend>::RunImpl(workspace_t<CPUBackend> &ws) {
   for (int sample_idx = 0; sample_idx < curr_batch_size; ++sample_idx) {
     thread_pool.AddWork([this, &ws, &input, &output, sample_idx](int tid) {
       const TypeInfo &type = input.type_info();
-      const auto *in_sample = reinterpret_cast<const char *>(input[sample_idx].raw_data());
-      auto *out_sample = reinterpret_cast<char *>(output[sample_idx].raw_mutable_data());
-      const auto &in_shape = input[sample_idx].shape();
+      const auto *in_sample = reinterpret_cast<const char *>(input.raw_tensor(sample_idx));
+      auto *out_sample = reinterpret_cast<char *>(output.raw_mutable_tensor(sample_idx));
+      const auto &in_shape = input.tensor_shape(sample_idx);
       auto element_sizeof = volume(in_shape.last(in_shape.sample_dim() - 1)) * type.size();
 
       TensorView<StorageCPU, const int, 1> new_order = {};
diff --git a/dali/operators/signal/fft/spectrogram.cc b/dali/operators/signal/fft/spectrogram.cc
index 9b5be5617c5..0aa841377f3 100644
--- a/dali/operators/signal/fft/spectrogram.cc
+++ b/dali/operators/signal/fft/spectrogram.cc
@@ -227,8 +227,8 @@ bool SpectrogramImplCpu<time_major>::SetupImpl(std::vector<OutputDesc> &out_desc
 
   auto view_window_fn = make_tensor_cpu<1>(window_fn_.data(), window_length_);
   for (int i = 0; i < nsamples; i++) {
-    auto view_signal_1d =
-        make_tensor_cpu<1>(input[i].template data<const InputType>(), {input[i].size()});
+    auto view_signal_1d = make_tensor_cpu<1>(input.template tensor<const InputType>(i),
+                                             {input.tensor_shape(i).num_elements()});
 
     auto &windows_req =
       kmgr_window_.Setup<WindowKernel>(
@@ -269,8 +269,8 @@ void SpectrogramImplCpu<time_major>::RunImpl(workspace_t<CPUBackend> &ws) {
         win_out.set_type<InputType>();
         win_out.Resize(window_out_desc_[0].shape.tensor_shape(i));
 
-        auto view_signal_1d =
-            make_tensor_cpu<1>(input[i].data<const InputType>(), {input[i].size()});
+        auto view_signal_1d = make_tensor_cpu<1>(input.tensor<const InputType>(i),
+                                                 {input.tensor_shape(i).num_elements()});
         kmgr_window_.Run<WindowKernel>(
           i, ctx,
           view<InputType, WindowsDims>(win_out),
diff --git a/dali/operators/util/property.cc b/dali/operators/util/property.cc
index 442bb26437b..898be949387 100644
--- a/dali/operators/util/property.cc
+++ b/dali/operators/util/property.cc
@@ -24,7 +24,7 @@ void SourceInfo<CPUBackend>::FillOutput(workspace_t<CPUBackend>& ws) {
   auto& output = ws.template Output<CPUBackend>(0);
   for (size_t sample_id = 0; sample_id < input.num_samples(); sample_id++) {
     auto si = GetSourceInfo(input, sample_id);
-    output[sample_id].Copy(make_cspan((const uint8_t*)si.c_str(), si.length()));
+    std::memcpy(output.mutable_tensor<uint8_t>(sample_id), si.c_str(), si.length());
   }
 }
 
@@ -34,8 +34,7 @@ void Layout<CPUBackend>::FillOutput(workspace_t<CPUBackend>& ws) {
   auto& output = ws.template Output<CPUBackend>(0);
   for (size_t sample_id = 0; sample_id < input.num_samples(); sample_id++) {
     auto layout = GetLayout(input, sample_id);
-    output[sample_id].Copy(
-        make_cspan(reinterpret_cast<const uint8_t*>(layout.c_str()), layout.size()));
+    std::memcpy(output.mutable_tensor<uint8_t>(sample_id), layout.c_str(), layout.size());
   }
 }
 
diff --git a/dali/operators/util/property.h b/dali/operators/util/property.h
index 9e7f53aa55a..272d0eaaed9 100644
--- a/dali/operators/util/property.h
+++ b/dali/operators/util/property.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -26,7 +26,7 @@ namespace tensor_property {
 namespace detail {
 
 inline const DALIMeta& GetMeta(const TensorVector<CPUBackend>& batch, int tensor_idx) {
-  return batch[tensor_idx].GetMeta();
+  return batch.GetMeta(tensor_idx);
 }
 
 inline const DALIMeta& GetMeta(const TensorList<GPUBackend>& batch, int tensor_idx) {
diff --git a/dali/pipeline/data/dltensor.h b/dali/pipeline/data/dltensor.h
index b75cec6e390..1509a02a4cf 100644
--- a/dali/pipeline/data/dltensor.h
+++ b/dali/pipeline/data/dltensor.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,10 +16,12 @@
 #define DALI_PIPELINE_DATA_DLTENSOR_H_
 
 #include <memory>
-#include <vector>
 #include <utility>
+#include <vector>
 #include "third_party/dlpack/include/dlpack/dlpack.h"
-#include "dali/pipeline/data/tensor.h"
+
+#include "dali/pipeline/data/sample_view.h"
+#include "dali/pipeline/data/tensor_list.h"
 
 namespace dali {
 
@@ -48,11 +50,11 @@ DLL_PUBLIC DLMTensorPtr MakeDLTensor(void *data, DALIDataType type,
                                      std::unique_ptr<DLTensorResource> resource);
 
 template <typename Backend>
-DLMTensorPtr GetDLTensorView(Tensor<Backend> &tensor) {
-  return MakeDLTensor(tensor.raw_mutable_data(),
+DLMTensorPtr GetDLTensorView(SampleView<Backend> tensor, int device_id) {
+  return MakeDLTensor(tensor._raw_mutable_data(),
                       tensor.type(),
                       std::is_same<Backend, GPUBackend>::value,
-                      tensor.device_id(),
+                      device_id,
                       std::make_unique<DLTensorResource>(tensor.shape()));
 }
 
diff --git a/dali/pipeline/data/dltensor_test.cc b/dali/pipeline/data/dltensor_test.cc
index ade6ef6cec6..5c6af7c4129 100644
--- a/dali/pipeline/data/dltensor_test.cc
+++ b/dali/pipeline/data/dltensor_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,19 +14,24 @@
 
 #include <gtest/gtest.h>
 #include <utility>
+
+#include "dali/pipeline/data/backend.h"
 #include "dali/pipeline/data/dltensor.h"
+#include "dali/pipeline/data/sample_view.h"
+#include "dali/pipeline/data/tensor.h"
 
 namespace dali {
 
 TEST(DLMTensorPtr, CPU) {
   Tensor<CPUBackend> tensor;
   tensor.Resize({100, 50, 3}, DALI_FLOAT);
-  DLMTensorPtr dlm_tensor = GetDLTensorView(tensor);
+  SampleView<CPUBackend> sv{tensor.raw_mutable_data(), tensor.shape(), tensor.type()};
+  DLMTensorPtr dlm_tensor = GetDLTensorView(sv, tensor.device_id());
   ASSERT_EQ(dlm_tensor->dl_tensor.ndim, 3);
   ASSERT_EQ(dlm_tensor->dl_tensor.shape[0], 100);
   ASSERT_EQ(dlm_tensor->dl_tensor.shape[1], 50);
   ASSERT_EQ(dlm_tensor->dl_tensor.shape[2], 3);
-  ASSERT_EQ(dlm_tensor->dl_tensor.data, tensor.raw_data());
+  ASSERT_EQ(dlm_tensor->dl_tensor.data, sv._raw_data());
   ASSERT_EQ(dlm_tensor->dl_tensor.dtype.code, kDLFloat);
   ASSERT_EQ(dlm_tensor->dl_tensor.dtype.bits, sizeof(float) * 8);
   ASSERT_EQ(dlm_tensor->dl_tensor.device.device_type, kDLCPU);
@@ -36,12 +41,13 @@ TEST(DLMTensorPtr, CPU) {
 TEST(DLMTensorPtr, GPU) {
   Tensor<GPUBackend> tensor;
   tensor.Resize({100, 50, 1}, DALI_INT32);
-  DLMTensorPtr dlm_tensor = GetDLTensorView(tensor);
+  SampleView<GPUBackend> sv{tensor.raw_mutable_data(), tensor.shape(), tensor.type()};
+  DLMTensorPtr dlm_tensor = GetDLTensorView(sv, tensor.device_id());
   ASSERT_EQ(dlm_tensor->dl_tensor.ndim, 3);
   ASSERT_EQ(dlm_tensor->dl_tensor.shape[0], 100);
   ASSERT_EQ(dlm_tensor->dl_tensor.shape[1], 50);
   ASSERT_EQ(dlm_tensor->dl_tensor.shape[2], 1);
-  ASSERT_EQ(dlm_tensor->dl_tensor.data, tensor.raw_data());
+  ASSERT_EQ(dlm_tensor->dl_tensor.data, sv._raw_data());
   ASSERT_EQ(dlm_tensor->dl_tensor.dtype.code, kDLInt);
   ASSERT_EQ(dlm_tensor->dl_tensor.dtype.bits, sizeof(int) * 8);
   ASSERT_EQ(dlm_tensor->dl_tensor.device.device_type, kDLCUDA);
diff --git a/dali/pipeline/data/sample_view.h b/dali/pipeline/data/sample_view.h
index 057b08a5463..0d404be8c92 100644
--- a/dali/pipeline/data/sample_view.h
+++ b/dali/pipeline/data/sample_view.h
@@ -33,8 +33,8 @@ namespace dali {
  * convenient `view<T, ndim>(SampleView)` conversion to TensorView, but doesn't break the batch
  * object encapsulation and doesn't allow to adjust the allocation.
  */
-template <typename Backend>
-class SampleView {
+template <typename Backend, typename ptr_t>
+class SampleViewBase {
  public:
   /**
    * @name Get the underlying pointer to data
@@ -43,14 +43,15 @@ class SampleView {
   /**
    * @brief Return an un-typed pointer to the underlying storage.
    */
-  void *raw_mutable_data() {
+  template <typename ptr_t_ = ptr_t>
+  std::enable_if_t<std::is_same<ptr_t_, void *>::value, void *> _raw_mutable_data() {
     return data_;
   }
 
   /**
    * @brief Return a const, un-typed pointer to the underlying storage.
    */
-  const void *raw_data() const {
+  const void *_raw_data() const {
     return data_;
   }
 
@@ -58,8 +59,8 @@ class SampleView {
    * @brief Returns a typed pointer to the underlying storage.
    * The calling type must match the underlying type of the buffer.
    */
-  template <typename T>
-  inline T *mutable_data() {
+  template <typename T, typename ptr_t_ = ptr_t>
+  inline std::enable_if_t<std::is_same<ptr_t_, void *>::value, T *> _mutable_data() {
     DALI_ENFORCE(
         type() == TypeTable::GetTypeId<T>(),
         make_string(
@@ -74,14 +75,14 @@ class SampleView {
    * The calling type must match the underlying type of the buffer.
    */
   template <typename T>
-  inline const T *data() const {
+  inline const T *_data() const {
     DALI_ENFORCE(
         type() == TypeTable::GetTypeId<T>(),
         make_string(
             "Calling type does not match buffer data type, requested type: ",
             TypeTable::GetTypeId<T>(), " current buffer type: ", type(),
             ". To set type for the Buffer use 'set_type<T>()' or Resize(shape, type) first."));
-    return static_cast<T *>(data_);
+    return static_cast<const T *>(data_);
   }
   //@}
 
@@ -100,16 +101,16 @@ class SampleView {
   }
 
 
-  SampleView() = default;
+  SampleViewBase() = default;
 
-  SampleView(const SampleView &) = default;
-  SampleView &operator=(const SampleView &) = default;
+  SampleViewBase(const SampleViewBase &) = default;
+  SampleViewBase &operator=(const SampleViewBase &) = default;
 
-  SampleView(SampleView &&other) {
+  SampleViewBase(SampleViewBase &&other) {
     *this = std::move(other);
   }
 
-  SampleView &operator=(SampleView &&other) {
+  SampleViewBase &operator=(SampleViewBase &&other) {
     if (this != &other) {
       data_ = other.data_;
       other.data_ = nullptr;
@@ -125,24 +126,79 @@ class SampleView {
    * @brief Construct the view inferring the type_id from the pointer value.
    */
   template <typename T>
-  SampleView(T *data, TensorShape<> shape)
-      : data_(data), shape_(std::move(shape)), type_id_(TypeTable::GetTypeId<T>()) {}
+  SampleViewBase(T *data, TensorShape<> shape)
+      : data_(data),
+        shape_(std::move(shape)),
+        type_id_(TypeTable::GetTypeId<std::remove_const_t<T>>()) {}
 
   /**
    * @brief Construct the view with explicitly provided type_id.
    */
-  SampleView(void *data, const TensorShape<> shape, DALIDataType type_id)
+  SampleViewBase(ptr_t data, const TensorShape<> shape, DALIDataType type_id)
       : data_(data), shape_(std::move(shape)), type_id_(type_id) {}
 
- private:
+ protected:
   // TODO(klecki): The view is introduced with no co-owning pointer, it will be evaluated
   // if the usage of shared_ptr is possbile and adjusted if necessary.
   // Using shared_ptr might allow for sample exchange between two batches using operator[]
-  void *data_ = nullptr;
+  ptr_t data_ = nullptr;
   TensorShape<> shape_ = {0};
   DALIDataType type_id_ = DALI_NO_TYPE;
 };
 
+
+template <typename Backend>
+class SampleView : public SampleViewBase<Backend, void *> {
+ public:
+  using Base = SampleViewBase<Backend, void *>;
+  using Base::Base;
+
+ private:
+  using Base::data_;
+  using Base::shape_;
+  using Base::type_id_;
+};
+
+
+template <typename Backend>
+class ConstSampleView : public SampleViewBase<Backend, const void *> {
+ public:
+  using Base = SampleViewBase<Backend, const void *>;
+  using Base::Base;
+
+  explicit ConstSampleView(const SampleView<Backend> &other)
+      : Base(other._raw_data(), other.shape(), other.type()) {}
+
+  ConstSampleView &operator=(const SampleView<Backend> &other) {
+    data_ = other.data();
+    shape_ = other.shape();
+    type_id_ = other.type();
+    return *this;
+  }
+
+  explicit ConstSampleView(SampleView<Backend> &&other) {
+    *this = std::move(other);
+  }
+
+  ConstSampleView &operator=(SampleView<Backend> &&other) {
+    if (this != &other) {
+      data_ = other.data_;
+      other.data_ = nullptr;
+      shape_ = std::move(other.shape_);
+      other.shape_ = {0};
+      type_id_ = other.type_id_;
+      other.type_id_ = DALI_NO_TYPE;
+    }
+    return *this;
+  }
+
+ private:
+  using Base::data_;
+  using Base::shape_;
+  using Base::type_id_;
+};
+
+
 }  // namespace dali
 
 #endif  // DALI_PIPELINE_DATA_SAMPLE_VIEW_H_
diff --git a/dali/pipeline/data/sample_view_test.cc b/dali/pipeline/data/sample_view_test.cc
index 7a674e44718..f4d078dabd3 100644
--- a/dali/pipeline/data/sample_view_test.cc
+++ b/dali/pipeline/data/sample_view_test.cc
@@ -26,11 +26,10 @@
 
 namespace dali {
 
-
 template <typename SampleView>
 void compare(const SampleView &sv, const void *ptr, const TensorShape<> &shape,
              DALIDataType dtype) {
-  EXPECT_EQ(sv.raw_data(), ptr);
+  EXPECT_EQ(sv._raw_data(), ptr);
   EXPECT_EQ(sv.shape(), shape);
   EXPECT_EQ(sv.type(), dtype);
 }
@@ -53,13 +52,45 @@ TEST(SampleView, Constructors) {
 
   SampleView<CPUBackend> from_void_ptr{reinterpret_cast<void *>(42), {1, 2, 3}, DALI_FLOAT};
   compare(from_void_ptr, reinterpret_cast<void *>(42), {1, 2, 3}, DALI_FLOAT);
+
+  const int32_t cdata{};
+  ConstSampleView<CPUBackend> const_from_ptr{&cdata, {1, 2, 3}};
+  compare(const_from_ptr, &cdata, {1, 2, 3}, DALI_INT32);
+
+  ConstSampleView<CPUBackend> const_from_void_ptr{
+      reinterpret_cast<const void *>(42), {1, 2, 3}, DALI_FLOAT};
+  compare(const_from_void_ptr, reinterpret_cast<void *>(42), {1, 2, 3}, DALI_FLOAT);
+
+  ConstSampleView<CPUBackend> const_from_nonconst{from_ptr};
+  compare(const_from_nonconst, &data, {1, 2, 3}, DALI_INT32);
+}
+
+
+TEST(SampleView, FromTensor) {
+  Tensor<CPUBackend> tensor;
+  tensor.Resize({1, 2, 3}, DALI_INT32);
+
+  auto sv = sample_view(tensor);
+  auto csv = const_sample_view(tensor);
+
+  compare(sv, tensor.raw_data(), {1, 2, 3}, DALI_INT32);
+  compare(csv, tensor.raw_data(), {1, 2, 3}, DALI_INT32);
+
+  Tensor<CPUBackend> scalar_tensor;
+  scalar_tensor.Resize({}, DALI_FLOAT);
+
+  auto scalar_sv = sample_view(scalar_tensor);
+  auto scalar_csv = const_sample_view(scalar_tensor);
+
+  compare(scalar_sv, scalar_tensor.raw_data(), {}, DALI_FLOAT);
+  compare(scalar_csv, scalar_tensor.raw_data(), {}, DALI_FLOAT);
 }
 
 
 TEST(SampleView, ViewConversion) {
   int32_t data{};
   SampleView<CPUBackend> sample_view{&data, {1, 2, 3}};
-  const SampleView<CPUBackend> const_sample_view{&data, {1, 2, 3}};
+  ConstSampleView<CPUBackend> const_sample_view{&data, {1, 2, 3}};
 
   compare(view<int32_t>(sample_view), TensorView<StorageCPU, int32_t>{&data, {1, 2, 3}});
   compare(view<int32_t, 3>(sample_view), TensorView<StorageCPU, int32_t, 3>{&data, {1, 2, 3}});
@@ -77,10 +108,11 @@ TEST(SampleView, ViewConversion) {
 TEST(SampleView, ViewConversionError) {
   int32_t data{};
   SampleView<CPUBackend> sample_view{&data, {1, 2, 3}};
-  const SampleView<CPUBackend> const_sample_view{&data, {1, 2, 3}};
+  ConstSampleView<CPUBackend> const_sample_view{&data, {1, 2, 3}};
 
   EXPECT_THROW(view<float>(sample_view), std::runtime_error);
   EXPECT_THROW(view<const float>(sample_view), std::runtime_error);
   EXPECT_THROW(view<const float>(const_sample_view), std::runtime_error);
 }
+
 }  // namespace dali
diff --git a/dali/pipeline/data/tensor.h b/dali/pipeline/data/tensor.h
index aa86bf5dafb..c38c8b51bc8 100644
--- a/dali/pipeline/data/tensor.h
+++ b/dali/pipeline/data/tensor.h
@@ -422,6 +422,10 @@ class Tensor : public Buffer<Backend> {
     return *this;
   }
 
+  DALIMeta &GetMeta() {
+    return meta_;
+  }
+
   const DALIMeta &GetMeta() const {
     return meta_;
   }
diff --git a/dali/pipeline/data/tensor_list.h b/dali/pipeline/data/tensor_list.h
index 67e440c3c3f..2f8c1ce7ca2 100644
--- a/dali/pipeline/data/tensor_list.h
+++ b/dali/pipeline/data/tensor_list.h
@@ -113,8 +113,8 @@ class DLL_PUBLIC TensorList {
   template <typename SrcBackend>
   DLL_PUBLIC inline void Copy(const TensorVector<SrcBackend> &other, AccessOrder order = {},
                               bool use_copy_kernel = false) {
-    auto type = other[0].type();
-    auto layout = other[0].GetLayout();
+    auto type = other.type();
+    auto layout = other.GetLayout();
 
     int dim = other[0].shape().sample_dim();
     TensorListShape<> new_shape(other.num_samples(), dim);
@@ -124,7 +124,7 @@ class DLL_PUBLIC TensorList {
          + std::to_string(i) + " expected Tensor with dim = " + to_string(dim)
          + " found Tensor with dim = " + to_string(other[i].shape().sample_dim()));
       assert(type == other[i].type());
-      assert(layout == other[i].GetLayout());
+      assert(layout == other.GetMeta(i).GetLayout());
       new_shape.set_tensor_shape(i, other[i].shape());
     }
 
@@ -145,10 +145,9 @@ class DLL_PUBLIC TensorList {
     sizes.reserve(nsamples);
     for (size_t i = 0; i < nsamples; i++) {
       dsts.emplace_back(this->raw_mutable_tensor(i));
-      srcs.emplace_back(other[i].raw_data());
-      sizes.emplace_back(other[i].size());
-      this->meta_[i].SetSourceInfo(other[i].GetSourceInfo());
-      this->meta_[i].SetSkipSample(other[i].ShouldSkipSample());
+      srcs.emplace_back(other[i]._raw_data());
+      sizes.emplace_back(other[i].shape().num_elements());
+      this->meta_[i] = other.GetMeta(i);
     }
 
     use_copy_kernel &= (std::is_same<SrcBackend, GPUBackend>::value || other.is_pinned()) &&
@@ -605,6 +604,10 @@ class DLL_PUBLIC TensorList {
     return meta_[idx].ShouldSkipSample();
   }
 
+  inline DALIMeta &GetMeta(int idx) {
+    return meta_[idx];
+  }
+
   inline const DALIMeta &GetMeta(int idx) const {
     return meta_[idx];
   }
@@ -636,17 +639,31 @@ class DLL_PUBLIC TensorList {
   /**
    * @brief Returns the size in bytes of the underlying data
    */
-  size_t nbytes() const {
+  size_t total_nbytes() const {
     return data_.nbytes();
   }
 
   /**
    * @brief Returns the real size of the allocation
    */
-  size_t capacity() const {
+  size_t total_capacity() const {
     return data_.capacity();
   }
 
+  /**
+   * @brief Returns the size in bytes of the underlying data
+   */
+  std::vector<size_t> nbytes() const {
+    return {data_.nbytes()};
+  }
+
+  /**
+   * @brief Returns the real size of the allocation
+   */
+  std::vector<size_t> capacity() const {
+    return {data_.capacity()};
+  }
+
   /**
    * @brief Set the type of the TensorList. The type needs to be set before calling
    * the Resize function that gives the shape. Type can be changed, if the current storage
diff --git a/dali/pipeline/data/tensor_list_test.cc b/dali/pipeline/data/tensor_list_test.cc
index e13df466127..699a431b778 100644
--- a/dali/pipeline/data/tensor_list_test.cc
+++ b/dali/pipeline/data/tensor_list_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -107,7 +107,7 @@ TYPED_TEST(TensorListTest, TestGetTypeSizeBytes) {
   tl.template set_type<float>();
 
   ASSERT_EQ(tl._num_elements(), 0);
-  ASSERT_EQ(tl.nbytes(), 0);
+  ASSERT_EQ(tl.total_nbytes(), 0);
   ASSERT_FALSE(tl.has_data());
 
   // Give the tensor list a size. This
@@ -127,7 +127,7 @@ TYPED_TEST(TensorListTest, TestGetTypeSizeBytes) {
   ASSERT_TRUE(tl.has_data());
   ASSERT_EQ(tl.num_samples(), num_tensor);
   ASSERT_EQ(tl._num_elements(), size);
-  ASSERT_EQ(tl.nbytes(), size*sizeof(float));
+  ASSERT_EQ(tl.total_nbytes(), size*sizeof(float));
   ASSERT_TRUE(IsType<float>(tl.type()));
 
   tl.reserve(shape.num_elements() * sizeof(float));
@@ -146,8 +146,8 @@ TYPED_TEST(TensorListTest, TestReserveResize) {
   ASSERT_THROW(tl.set_pinned(true), std::runtime_error);
 
   ASSERT_TRUE(tl.has_data());
-  ASSERT_EQ(tl.capacity(), shape.num_elements() * sizeof(float));
-  ASSERT_EQ(tl.nbytes(), 0);
+  ASSERT_EQ(tl.total_capacity(), shape.num_elements() * sizeof(float));
+  ASSERT_EQ(tl.total_nbytes(), 0);
   ASSERT_EQ(tl._num_elements(), 0);
   ASSERT_NE(unsafe_raw_data(tl), nullptr);
 
@@ -155,7 +155,7 @@ TYPED_TEST(TensorListTest, TestReserveResize) {
   tl.template set_type<float>();
 
   ASSERT_EQ(tl._num_elements(), 0);
-  ASSERT_EQ(tl.nbytes(), 0);
+  ASSERT_EQ(tl.total_nbytes(), 0);
   ASSERT_TRUE(tl.has_data());
 
   // We already had the allocation, just give it a shape and a type
@@ -173,7 +173,7 @@ TYPED_TEST(TensorListTest, TestReserveResize) {
   ASSERT_TRUE(tl.has_data());
   ASSERT_EQ(tl.num_samples(), num_tensor);
   ASSERT_EQ(tl._num_elements(), size);
-  ASSERT_EQ(tl.nbytes(), size*sizeof(float));
+  ASSERT_EQ(tl.total_nbytes(), size*sizeof(float));
   ASSERT_TRUE(IsType<float>(tl.type()));
 
 
@@ -214,7 +214,7 @@ TYPED_TEST(TensorListTest, TestGetContiguousPointer) {
   // Verify the internals
   ASSERT_EQ(tl._num_elements(), volume);
   ASSERT_EQ(tl.num_samples(), num_tensor);
-  ASSERT_EQ(tl.nbytes(), volume * sizeof(uint32_t));
+  ASSERT_EQ(tl.total_nbytes(), volume * sizeof(uint32_t));
   ASSERT_EQ(tl.type(), DALI_UINT32);
   ASSERT_TRUE(tl.IsContiguous());
   ASSERT_NE(unsafe_raw_data(tl), nullptr);
@@ -244,7 +244,7 @@ TYPED_TEST(TensorListTest, TestGetBytesThenNoAlloc) {
     ASSERT_EQ(tl.raw_tensor(i), sharer.raw_tensor(i));
   }
   ASSERT_EQ(tl._num_elements(), size);
-  ASSERT_EQ(tl.nbytes(), size*sizeof(float));
+  ASSERT_EQ(tl.total_nbytes(), size*sizeof(float));
   ASSERT_EQ(tl.type(), sharer.type());
   ASSERT_EQ(tl.num_samples(), num_tensor);
   ASSERT_TRUE(tl.shares_data());
@@ -279,7 +279,7 @@ TYPED_TEST(TensorListTest, TestGetBytesThenAlloc) {
     ASSERT_EQ(tl.raw_tensor(i), sharer.raw_tensor(i));
   }
   ASSERT_EQ(tl._num_elements(), size);
-  ASSERT_EQ(tl.nbytes(), size*sizeof(float));
+  ASSERT_EQ(tl.total_nbytes(), size*sizeof(float));
   ASSERT_EQ(tl.type(), sharer.type());
   ASSERT_EQ(tl.num_samples(), num_tensor);
   ASSERT_TRUE(tl.shares_data());
@@ -298,7 +298,7 @@ TYPED_TEST(TensorListTest, TestZeroSizeResize) {
   tensor_list.Resize(shape);
 
   ASSERT_FALSE(tensor_list.has_data());
-  ASSERT_EQ(tensor_list.nbytes(), 0);
+  ASSERT_EQ(tensor_list.total_nbytes(), 0);
   ASSERT_EQ(tensor_list._num_elements(), 0);
   ASSERT_FALSE(tensor_list.shares_data());
 }
@@ -311,7 +311,7 @@ TYPED_TEST(TensorListTest, TestMultipleZeroSizeResize) {
   tensor_list.Resize(shape, DALI_FLOAT);
 
   ASSERT_FALSE(tensor_list.has_data());
-  ASSERT_EQ(tensor_list.nbytes(), 0);
+  ASSERT_EQ(tensor_list.total_nbytes(), 0);
   ASSERT_EQ(tensor_list.num_samples(), num_tensor);
   ASSERT_EQ(tensor_list._num_elements(), 0);
   ASSERT_FALSE(tensor_list.shares_data());
@@ -333,7 +333,7 @@ TYPED_TEST(TensorListTest, TestFakeScalarResize) {
   tensor_list.Resize(shape);
 
   ASSERT_TRUE(tensor_list.has_data());
-  ASSERT_EQ(tensor_list.nbytes(), num_scalar*sizeof(float));
+  ASSERT_EQ(tensor_list.total_nbytes(), num_scalar*sizeof(float));
   ASSERT_EQ(tensor_list._num_elements(), num_scalar);
   ASSERT_FALSE(tensor_list.shares_data());
 
@@ -353,7 +353,7 @@ TYPED_TEST(TensorListTest, TestTrueScalarResize) {
   tensor_list.Resize(shape);
 
   ASSERT_TRUE(tensor_list.has_data());
-  ASSERT_EQ(tensor_list.nbytes(), num_scalar*sizeof(float));
+  ASSERT_EQ(tensor_list.total_nbytes(), num_scalar*sizeof(float));
   ASSERT_EQ(tensor_list._num_elements(), num_scalar);
   ASSERT_FALSE(tensor_list.shares_data());
 
@@ -456,7 +456,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSameSize) {
   for (size_t i = 0; i < tensor_list.num_samples(); i++) {
     ptrs.push_back(tensor_list.raw_tensor(i));
   }
-  size_t nbytes = tensor_list.nbytes();
+  size_t nbytes = tensor_list.total_nbytes();
 
   // Change the data type
   tensor_list.template set_type<int>();
@@ -470,7 +470,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSameSize) {
   }
 
   // No memory allocation should have occurred
-  ASSERT_EQ(nbytes, tensor_list.nbytes());
+  ASSERT_EQ(nbytes, tensor_list.total_nbytes());
 }
 
 TYPED_TEST(TensorListTest, TestTypeChangeSmaller) {
@@ -482,7 +482,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSmaller) {
 
   this->SetupTensorList(&tensor_list, shape, &offsets);
 
-  size_t nbytes = tensor_list.nbytes();
+  size_t nbytes = tensor_list.total_nbytes();
   const auto *base_ptr = unsafe_raw_data(tensor_list);
 
   // Change the data type to something smaller
@@ -497,7 +497,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeSmaller) {
   }
 
   // nbytes should have reduced by a factor of 4
-  ASSERT_EQ(nbytes / sizeof(float) * sizeof(uint8), tensor_list.nbytes());
+  ASSERT_EQ(nbytes / sizeof(float) * sizeof(uint8), tensor_list.total_nbytes());
 }
 
 TYPED_TEST(TensorListTest, TestTypeChangeLarger) {
@@ -509,7 +509,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeLarger) {
 
   this->SetupTensorList(&tensor_list, shape, &offsets);
 
-  size_t nbytes = tensor_list.nbytes();
+  size_t nbytes = tensor_list.total_nbytes();
 
   // Change the data type to something larger
   tensor_list.template set_type<double>();
@@ -522,7 +522,7 @@ TYPED_TEST(TensorListTest, TestTypeChangeLarger) {
   }
 
   // nbytes should have increased by a factor of 2
-  ASSERT_EQ(nbytes / sizeof(float) * sizeof(double), tensor_list.nbytes());
+  ASSERT_EQ(nbytes / sizeof(float) * sizeof(double), tensor_list.total_nbytes());
 }
 
 TYPED_TEST(TensorListTest, TestShareData) {
@@ -559,7 +559,7 @@ TYPED_TEST(TensorListTest, TestShareData) {
 
   // Check the internals
   ASSERT_TRUE(tensor_list2.shares_data());
-  ASSERT_EQ(tensor_list2.nbytes(), tensor_list.nbytes());
+  ASSERT_EQ(tensor_list2.total_nbytes(), tensor_list.total_nbytes());
   ASSERT_EQ(tensor_list2.num_samples(), tensor_list.num_samples());
   ASSERT_EQ(tensor_list2._num_elements(), tensor_list._num_elements());
   for (size_t i = 0; i < tensor_list.num_samples(); ++i) {
@@ -575,7 +575,7 @@ TYPED_TEST(TensorListTest, TestShareData) {
 
   // Check the internals
   ASSERT_EQ(tensor_list2._num_elements(), 0);
-  ASSERT_EQ(tensor_list2.nbytes(), 0);
+  ASSERT_EQ(tensor_list2.total_nbytes(), 0);
   ASSERT_EQ(tensor_list2.num_samples(), 0);
   ASSERT_EQ(tensor_list2.shape(), TensorListShape<>());
 }
diff --git a/dali/pipeline/data/tensor_test.cc b/dali/pipeline/data/tensor_test.cc
index 44baaf1b560..537118517d1 100644
--- a/dali/pipeline/data/tensor_test.cc
+++ b/dali/pipeline/data/tensor_test.cc
@@ -361,7 +361,8 @@ TYPED_TEST(TensorTest, TestShareData) {
   for (int i = 0; i < num_tensor; ++i) {
     // TODO(klecki): Rework this with proper sample-based tensor batch data structure
     auto sample_shared_ptr = unsafe_sample_owner(tl, i);
-    tensor.ShareData(sample_shared_ptr, tl.capacity(), tl.is_pinned(), tl.shape()[i], tl.type());
+    tensor.ShareData(sample_shared_ptr, tl.total_capacity(), tl.is_pinned(), tl.shape()[i],
+                     tl.type());
     tensor.set_device_id(tl.device_id());
     tensor.SetMeta(tl.GetMeta(i));
 
@@ -379,11 +380,11 @@ TYPED_TEST(TensorTest, TestShareData) {
 
 TYPED_TEST(TensorTest, TestCopyToTensorList) {
   TensorVector<TypeParam> tensors(16);
-  for (auto& t : tensors) {
-    auto shape = this->GetRandShape(4, 4);
-    t->template set_type<float>();
-    t->Resize(shape);
+  TensorListShape<4> shape(16);
+  for (int i = 0; i < 16; i++) {
+    shape.set_tensor_shape(i, this->GetRandShape(4, 4));
   }
+  tensors.Resize(shape, DALI_FLOAT);
 
   TensorList<TypeParam> tl;
   tl.Copy(tensors);
@@ -394,8 +395,8 @@ TYPED_TEST(TensorTest, TestCopyToTensorList) {
     ASSERT_EQ(tensors[i].type(), tl.type());
     ASSERT_EQ(tensors[i].shape(), tl.tensor_shape(i));
     Index size = volume(tl.tensor_shape(i));
-    ASSERT_EQ(tensors[i].size(), size);
-    ASSERT_EQ(tensors[i].nbytes(), size*sizeof(float));
+    ASSERT_EQ(tensors[i].shape().num_elements(), size);
+    ASSERT_EQ(tensors[i].shape().num_elements() * tensors.type_info().size(), size*sizeof(float));
   }
 }
 
diff --git a/dali/pipeline/data/tensor_vector.cc b/dali/pipeline/data/tensor_vector.cc
index 8740a97bfb2..1b78a90529d 100644
--- a/dali/pipeline/data/tensor_vector.cc
+++ b/dali/pipeline/data/tensor_vector.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "dali/pipeline/data/tensor_vector.h"
+#include "dali/core/common.h"
 
 namespace dali {
 
@@ -62,11 +63,112 @@ TensorVector<Backend>::TensorVector(TensorVector<Backend> &&other) noexcept {
   other.tensors_.clear();
 }
 
-
 template <typename Backend>
-size_t TensorVector<Backend>::nbytes() const noexcept {
+void TensorVector<Backend>::UnsafeSetSample(int dst, const TensorVector<Backend> &owner, int src) {
+  // TODO(klecki): more consistency checks, contiguous -> non-contiguous removes shares_data from
+  // samples
+  if (type() == DALI_NO_TYPE && owner.type() != DALI_NO_TYPE) {
+    set_type(owner.type());
+  }
+  if (!order()) {
+    set_order(owner.order());
+  }
+  // Bounds check
+  assert(dst >= 0 && dst < static_cast<int>(curr_tensors_size_));
+  assert(src >= 0 && src < static_cast<int>(owner.curr_tensors_size_));
+  DALI_ENFORCE(type() == owner.type(),
+               make_string("Sample must have the same type as a target batch, current: ", type(),
+                           " new: ", owner.type(), " for ", dst, " <- ", src, "."));
+  DALI_ENFORCE(tensor_shape(dst) == TensorShape<>{0} || sample_dim() == owner.shape().sample_dim(),
+               make_string("Sample must have the same dimensionality as a target batch, current: ",
+                           sample_dim(), " new: ", owner.shape().sample_dim(), " for ", dst, " <- ",
+                           src, "."));
+  DALI_ENFORCE(this->order() == owner.order(), "Sample must have the same order as a target batch");
+  DALI_ENFORCE(
+      GetLayout() == "" || GetLayout() == owner.GetLayout(),
+      make_string("Sample must have the same layout as a target batch current: ", GetLayout(),
+                  " new: ", owner.GetLayout(), " for ", dst, " <- ", src, "."));
+
+  SetContiguous(false);
+  // Setting a new share overwrites the previous one - so we can safely assume that even if
+  // we had a sample sharing into TL, it will be overwritten
+  tensors_[dst]->ShareData(*owner.tensors_[src]);
+  tl_->Reset();
+}
+
+template <typename Backend>
+void TensorVector<Backend>::UnsafeSetSample(int dst, const Tensor<Backend> &owner) {
+  // TODO(klecki): more consistency checks, contiguous -> non-contiguous removes shares_data from
+  // samples
+  if (type() == DALI_NO_TYPE && owner.type() != DALI_NO_TYPE) {
+    set_type(owner.type());
+  }
+  if (!order()) {
+    set_order(owner.order());
+  }
+  // Bounds check
+  assert(dst >= 0 && dst < static_cast<int>(curr_tensors_size_));
+  DALI_ENFORCE(type() == owner.type(),
+               make_string("Sample must have the same type as a target batch, current: ", type(),
+                           " new: ", owner.type(), " for ", dst, " <-."));
+  DALI_ENFORCE(
+      tensor_shape(dst) == TensorShape<>{0} || sample_dim() == owner.shape().sample_dim(),
+      make_string("Sample must have the same dimensionality as a target batch, current: ",
+                  sample_dim(), " new: ", owner.shape().sample_dim(), " for ", dst, " <-."));
+  DALI_ENFORCE(this->order() == owner.order(), "Sample must have the same order as a target batch");
+  DALI_ENFORCE(
+      GetLayout() == "" || GetLayout() == owner.GetLayout(),
+      make_string("Sample must have the same layout as a target batch current: ", GetLayout(),
+                  " new: ", owner.GetLayout(), " for ", dst, " <-."));
+  SetContiguous(false);
+  // Setting a new share overwrites the previous one - so we can safely assume that even if
+  // we had a sample sharing into TL, it will be overwritten
+  tensors_[dst]->ShareData(owner);
+  tl_->Reset();
+}
+
+template <typename Backend>
+void TensorVector<Backend>::UnsafeCopySample(int dst, const TensorVector<Backend> &data, int src,
+                                             AccessOrder order) {
+  // TODO(klecki): more consistency checks, contiguous -> non-contiguous removes shares_data from
+  // samples
+  if (type() == DALI_NO_TYPE && data.type() != DALI_NO_TYPE) {
+    set_type(data.type());
+  }
+  // Bounds check
+  assert(dst >= 0 && dst < static_cast<int>(curr_tensors_size_));
+  assert(src >= 0 && src < static_cast<int>(data.curr_tensors_size_));
+  DALI_ENFORCE(type() == data.type(),
+               make_string("Sample must have the same type as a target batch, current: ", type(),
+                           " new: ", data.type(), " for ", dst, " <- ", src, "."));
+  DALI_ENFORCE(tensor_shape(dst) == TensorShape<>{0} || sample_dim() == data.shape().sample_dim(),
+               make_string("Sample must have the same dimensionality as a target batch, current: ",
+                           sample_dim(), " new: ", data.shape().sample_dim(), " for ", dst, " <- ",
+                           src, "."));
+  DALI_ENFORCE(
+      GetLayout() == "" || GetLayout() == data.GetLayout(),
+      make_string("Sample must have the same layout as a target batch current: ", GetLayout(),
+                  " new: ", data.GetLayout(), " for ", dst, " <- ", src, "."));
+
+  // Either the shape matches and we can copy data as is or the target is just an individual sample
+  bool can_copy =
+      tensors_[dst]->shape() == data.tensors_[src]->shape() ||
+      (!tl_->has_data() && state_ == State::noncontiguous);
+
+  DALI_ENFORCE(
+      can_copy,
+      "Copying samples into TensorVector can happen either for exact shape match or when the "
+      "TensorVector is truly non contiguous. Either Resize first to the desired shape or reset the "
+      "TensorVector and SetSize for desired number of samples in non-contiguous mode.");
+
+  tensors_[dst]->Copy(*data.tensors_[src], order);
+}
+
+
+template <typename Backend>
+size_t TensorVector<Backend>::total_nbytes() const noexcept {
   if (state_ == State::contiguous) {
-    return tl_->nbytes();
+    return tl_->total_nbytes();
   }
   // else
   size_t total_nbytes = 0;
@@ -78,9 +180,9 @@ size_t TensorVector<Backend>::nbytes() const noexcept {
 
 
 template <typename Backend>
-size_t TensorVector<Backend>::capacity() const noexcept {
+size_t TensorVector<Backend>::total_capacity() const noexcept {
   if (state_ == State::contiguous) {
-    return tl_->capacity();
+    return tl_->total_capacity();
   }
   // else
   size_t total_capacity = 0;
@@ -90,6 +192,33 @@ size_t TensorVector<Backend>::capacity() const noexcept {
   return total_capacity;
 }
 
+template <typename Backend>
+std::vector<size_t> TensorVector<Backend>::nbytes() const noexcept {
+  if (state_ == State::contiguous) {
+    return {tl_->nbytes()};
+  }
+  // else
+  std::vector<size_t> result(tensors_.size());
+  for (size_t i = 0; i < tensors_.size(); i++) {
+    result[i] = tensors_[i]->nbytes();
+  }
+  return result;
+}
+
+
+template <typename Backend>
+std::vector<size_t> TensorVector<Backend>::capacity() const noexcept {
+  if (state_ == State::contiguous) {
+    return {tl_->capacity()};
+  }
+  // else
+  std::vector<size_t> result(tensors_.size());
+  for (size_t i = 0; i < tensors_.size(); i++) {
+    result[i] = tensors_[i]->capacity();
+  }
+  return result;
+}
+
 
 template <typename Backend>
 TensorListShape<> TensorVector<Backend>::shape() const {
@@ -125,6 +254,7 @@ void TensorVector<Backend>::set_order(AccessOrder order, bool synchronize) {
   tl_->set_order(order, false);
   for (auto &t : tensors_)
     t->set_order(order, false);
+  order_ = order;
 }
 
 template <typename Backend>
@@ -223,6 +353,11 @@ TensorLayout TensorVector<Backend>::GetLayout() const {
   return {};
 }
 
+template <typename Backend>
+DALIMeta &TensorVector<Backend>::GetMeta(int idx) {
+  assert(static_cast<size_t>(idx) < curr_tensors_size_);
+  return tensors_[idx]->GetMeta();
+}
 
 template <typename Backend>
 const DALIMeta &TensorVector<Backend>::GetMeta(int idx) const {
@@ -264,6 +399,17 @@ bool TensorVector<Backend>::is_pinned() const {
 }
 
 
+template <typename Backend>
+int TensorVector<Backend>::device_id() const {
+  if (IsContiguous()) {
+    return tl_->device_id();
+  } else if (!tensors_.empty()) {
+    return tensors_[0]->device_id();
+  }
+  return CPU_ONLY_DEVICE_ID;
+}
+
+
 template <typename Backend>
 void TensorVector<Backend>::reserve(size_t total_bytes) {
   if (state_ == State::noncontiguous) {
@@ -432,6 +578,7 @@ void TensorVector<Backend>::resize_tensors(int new_size) {
       if (!tensors_[i]) {
         tensors_[i] = std::make_shared<Tensor<Backend>>();
         tensors_[i]->set_pinned(is_pinned());
+        tensors_[i]->set_order(order());
       }
     }
   } else if (static_cast<size_t>(new_size) < curr_tensors_size_) {
@@ -440,10 +587,41 @@ void TensorVector<Backend>::resize_tensors(int new_size) {
         tensors_[i]->Reset();
       }
     }
+    // TODO(klecki): Do not keep the invalidated tensors - this prevents memory hogging but
+    // also gets rid of reserved memory.
+    // tensors_.resize(new_size);
   }
   curr_tensors_size_ = new_size;
 }
 
+template <typename Backend>
+void TensorVector<Backend>::PropagateUp(bool contiguous) {
+  // TODO(klecki): This is mostly simple consistency check, but most of the metadata will be moved
+  // to the batch object for consitency and easier use in checks. It should allow for shape()
+  // to be ready to use as well as easy verification for SetSample/CopySample.
+  SetContiguous(contiguous);
+  // assume that the curr_tensors_size_ is valid
+  DALI_ENFORCE(curr_tensors_size_ > 0, "Unexpected empty output of operator. Internal DALI error.");
+  type_ = tensors_[0]->type_info();
+  pinned_ = tensors_[0]->is_pinned();
+  order_ = tensors_[0]->order();
+  tl_->set_order(order_);
+  for (size_t i = 0; i < curr_tensors_size_; i++) {
+    DALI_ENFORCE(type() == tensors_[i]->type(),
+                 make_string("Samples must have the same type, expected: ", type(),
+                             " got: ", tensors_[i]->type(), " at ", i, "."));
+    DALI_ENFORCE(sample_dim() == tensors_[i]->shape().sample_dim(),
+                 make_string("Samples must have the same dimensionality, expected: ", sample_dim(),
+                             " got: ", tensors_[i]->shape().sample_dim(), " at ", i, "."));
+    DALI_ENFORCE(order() == tensors_[i]->order(),
+                 make_string("Samples must have the same order, expected: ", order().get(), " ",
+                             order().device_id(), " got: ", tensors_[i]->order().get(), " ",
+                             tensors_[i]->order().device_id(), " at ", i, "."));
+    DALI_ENFORCE(GetLayout() == tensors_[i]->GetLayout(),
+                 make_string("Samples must have the same layout, expected: ", GetLayout(),
+                             " got: ", tensors_[i]->GetLayout(), " at ", i, "."));
+  }
+}
 
 template <typename Backend>
 void TensorVector<Backend>::update_view(int idx) {
diff --git a/dali/pipeline/data/tensor_vector.h b/dali/pipeline/data/tensor_vector.h
index 4adc94de9c5..98bd7230eca 100644
--- a/dali/pipeline/data/tensor_vector.h
+++ b/dali/pipeline/data/tensor_vector.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -21,11 +21,13 @@
 #include <utility>
 #include <vector>
 
+#include "dali/core/access_order.h"
+#include "dali/core/tensor_shape.h"
 #include "dali/pipeline/data/backend.h"
+#include "dali/pipeline/data/sample_view.h"
 #include "dali/pipeline/data/tensor.h"
 #include "dali/pipeline/data/tensor_list.h"
 
-#include "dali/core/tensor_shape.h"
 
 namespace dali {
 
@@ -35,6 +37,13 @@ namespace dali {
  *
  * Propagates Buffer calls to every tensor uniformly
  *
+ * TODO(klecki): Expected improvements to TensorVector
+ * 1. Remove superfluous indirection via shared_ptr to samples.
+ * 2. Keep metadata (shape, sample_dim, layout, order) at batch level like we already do with type
+ * 3. Detect and convert between contiguous and non-contiguous when possible:
+ *    a. CopySample of bigger size
+ *    b. Resize with coalesce option
+ * 4. Contiguity check
  * @tparam Backend
  */
 template <typename Backend>
@@ -60,17 +69,17 @@ class DLL_PUBLIC TensorVector {
   DLL_PUBLIC TensorVector<Backend>(TensorVector<Backend> &&other) noexcept;
 
   AccessOrder order() const {
-    return tl_->order();
+    return order_;
   }
 
   void set_order(AccessOrder order, bool synchronize = true);
 
-  Tensor<Backend> &operator[](size_t pos) {
-    return *(tensors_[pos]);
+  SampleView<Backend> operator[](size_t pos) {
+    return {tensors_[pos]->raw_mutable_data(), tensors_[pos]->shape(), tensors_[pos]->type()};
   }
 
-  const Tensor<Backend> &operator[](size_t pos) const {
-    return *(tensors_[pos]);
+  ConstSampleView<Backend> operator[](size_t pos) const {
+    return {tensors_[pos]->raw_data(), tensors_[pos]->shape(), tensors_[pos]->type()};
   }
 
   auto tensor_handle(size_t pos) {
@@ -113,9 +122,13 @@ class DLL_PUBLIC TensorVector {
     return IsContiguous() ? tl_->sample_dim() : num_samples() ? tensors_[0]->shape().size() : 0;
   }
 
-  size_t nbytes() const noexcept;
+  size_t total_nbytes() const noexcept;
+
+  size_t total_capacity() const noexcept;
 
-  size_t capacity() const noexcept;
+  std::vector<size_t> nbytes() const noexcept;
+
+  std::vector<size_t> capacity() const noexcept;
 
   TensorListShape<> shape() const;
 
@@ -123,14 +136,83 @@ class DLL_PUBLIC TensorVector {
     return tensors_[idx]->shape();
   }
 
-  const void *raw_tensor(int idx) const {
-    return tensors_[idx]->raw_data();
+  /**
+   * @brief Returns a typed pointer to the tensor with the given index.
+   */
+  template <typename T>
+  DLL_PUBLIC inline T* mutable_tensor(int idx) {
+    return tensors_[idx]->template mutable_data<T>();
   }
 
-  void* raw_mutable_tensor(int idx) {
+  /**
+   * @brief Returns a const typed pointer to the tensor with the given index.
+   */
+  template <typename T>
+  DLL_PUBLIC inline const T* tensor(int idx) const {
+    return tensors_[idx]->template data<T>();
+  }
+
+  /**
+   * @brief Returns a raw pointer to the tensor with the given index.
+   */
+  DLL_PUBLIC inline void* raw_mutable_tensor(int idx) {
     return tensors_[idx]->raw_mutable_data();
   }
 
+  /**
+   * @brief Returns a const raw pointer to the tensor with the given index.
+   */
+  DLL_PUBLIC inline const void* raw_tensor(int idx) const {
+    return  tensors_[idx]->raw_data();
+  }
+
+  /**
+   * @brief Analogue of TensorVector[dst].ShareData(owner[src]);
+   *
+   * The target TensorVector (this) must have enough samples for this to work (see SetSize()).
+   * After this operation the TensorVector is converted into non-contiguous.
+   *
+   * Warning: If the TensorVector was contiguous, the samples that weren't overwritten by this
+   * function would still report that they are sharing data. It is assumed that all samples are
+   * replaced this way - TODO(klecki): this might be adjusted in follow-up.
+   *
+   * @param dst index of sample to be set
+   * @param owner owner of source sample
+   * @param src index of source sample in owner.
+   */
+  DLL_PUBLIC void UnsafeSetSample(int dst, const TensorVector<Backend> &owner, int src);
+
+  /**
+   * @brief Analogue of TensorVector[dst].ShareData(owner);
+   *
+   * The target TensorVector (this) must have enough samples for this to work (see SetSize()).
+   * After this operation the TensorVector is converted into non-contiguous.
+   *
+   * Warning: If the TensorVector was contiguous, the samples that weren't overwritten by this
+   * function would still report that they are sharing data. It is assumed that all samples are
+   * replaced this way - TODO(klecki): this might be adjusted in follow-up.
+   *
+   * @param dst index of sample to be set
+   * @param owner sample owner
+   */
+  DLL_PUBLIC void UnsafeSetSample(int dst, const Tensor<Backend> &owner);
+
+  /**
+   * @brief Analogue of TensorVector[dst].Copy(data[src]);
+   *
+   * The target TensorVector (this) must have enough samples for this to work (see SetSize()).
+   * It must either be already non-contiguous or the shapes of copied samples must match exactly.
+   *
+   * Warning: It is assumed that the TensorVector is either first resized to desired shape,
+   * or all samples are copied over. Automatically converting to non-contiguous container from
+   * contiguous one by invoking copy of non-matching size is not supported yet.
+   *
+   * @param dst index of sample to be set
+   * @param owner sample owner
+   */
+  DLL_PUBLIC void UnsafeCopySample(int dst, const TensorVector<Backend> &data, int src,
+                                   AccessOrder order = {});
+
   DLL_PUBLIC void Resize(const TensorListShape<> &new_shape) {
     DALI_ENFORCE(IsValidType(type()),
                  "TensorVector has no type, 'set_type<T>()' or Resize(shape, type) must be called "
@@ -163,6 +245,7 @@ class DLL_PUBLIC TensorVector {
 
   TensorLayout GetLayout() const;
 
+  DALIMeta &GetMeta(int idx);
   const DALIMeta &GetMeta(int idx) const;
 
   void SetMeta(int idx, const DALIMeta &meta);
@@ -171,6 +254,8 @@ class DLL_PUBLIC TensorVector {
 
   bool is_pinned() const;
 
+  int device_id() const;
+
   /**
    * @brief Reserve as contiguous tensor list internally
    */
@@ -213,6 +298,21 @@ class DLL_PUBLIC TensorVector {
  private:
   enum class State { contiguous, noncontiguous };
 
+  // Forward declarations in signature, beware
+  friend void MakeSampleView(class SampleWorkspace &sample, class HostWorkspace &batch,
+                             int data_idx, int thread_idx);
+  friend void EnforceCorrectness(class HostWorkspace &ws, bool contiguous);
+
+  /**
+   * @brief After RunImpl(SampleWorkspace&) operated on individual samples without propagating
+   * the allocation metadata back to the the batch structure, take that metadata from the samples
+   * and update it in TensorVector.
+   *
+   * @param contiguous if the Tensor was previously preallocated and should remain contiguous
+   * or be treated as non-contiguous set of individual samples.
+   */
+  void PropagateUp(bool contiguous);
+
   struct ViewRefDeleter {
     void operator()(void*) { --*ref; }
     std::atomic<int> *ref;
@@ -230,6 +330,7 @@ class DLL_PUBLIC TensorVector {
   // pinned status and type info should be uniform
   bool pinned_ = true;
   TypeInfo type_{};
+  AccessOrder order_;
 
   // So we can access the members of other TensorVectors
   // with different template types
diff --git a/dali/pipeline/data/tensor_vector_test.cc b/dali/pipeline/data/tensor_vector_test.cc
index 0fd3a88078b..c1950579283 100644
--- a/dali/pipeline/data/tensor_vector_test.cc
+++ b/dali/pipeline/data/tensor_vector_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -52,6 +52,8 @@ TYPED_TEST_SUITE(TensorVectorSuite, Backends);
 // behaves as it is supposed to - that is set_type always first, set_type before Reshape,
 // reserve can be without type.
 
+// TODO(klecki): reverse pinned and capacity tests
+
 TYPED_TEST(TensorVectorSuite, PinnedAfterReserveThrows) {
   TensorVector<TypeParam> tv_0, tv_1;
   tv_0.reserve(100);
@@ -74,10 +76,8 @@ TYPED_TEST(TensorVectorSuite, PinnedAfterResizeThrows) {
   EXPECT_EQ(tv.shape(), TensorListShape<>({{2, 4}, {4, 2}}));
   EXPECT_EQ(tv[0].shape(), TensorShape<>(2, 4));
   EXPECT_EQ(tv[1].shape(), TensorShape<>(4, 2));
-  EXPECT_EQ(tv[0].nbytes(), 4 * 2 * sizeof(int32_t));
-  EXPECT_EQ(tv[1].nbytes(), 4 * 2 * sizeof(int32_t));
-  EXPECT_EQ(tv[0].capacity(), 4 * 2 * sizeof(int32_t));
-  EXPECT_EQ(tv[1].capacity(), 4 * 2 * sizeof(int32_t));
+  EXPECT_EQ(tv[0].type(), DALI_INT32);
+  EXPECT_EQ(tv[1].type(), DALI_INT32);
   ASSERT_THROW(tv.set_pinned(false), std::runtime_error);
 }
 
@@ -92,10 +92,9 @@ TYPED_TEST(TensorVectorSuite, PinnedBeforeResizeContiguous) {
   EXPECT_EQ(tv.shape(), TensorListShape<>({{2, 4}, {4, 2}}));
   EXPECT_EQ(tv[0].shape(), TensorShape<>(2, 4));
   EXPECT_EQ(tv[1].shape(), TensorShape<>(4, 2));
+  EXPECT_EQ(tv.is_pinned(), false);
   for (auto &t : tv) {
-    EXPECT_EQ(t->nbytes(), 4 * 2 * sizeof(int32_t));
-    EXPECT_EQ(t->capacity(), 4 * 2 * sizeof(int32_t));
-    EXPECT_EQ(t->is_pinned(), false);
+    EXPECT_EQ(t->type(), DALI_INT32);
   }
 }
 
@@ -109,10 +108,9 @@ TYPED_TEST(TensorVectorSuite, PinnedBeforeResizeNoncontiguous) {
   EXPECT_EQ(tv.shape(), TensorListShape<>({{2, 4}, {4, 2}}));
   EXPECT_EQ(tv[0].shape(), TensorShape<>(2, 4));
   EXPECT_EQ(tv[1].shape(), TensorShape<>(4, 2));
+  EXPECT_EQ(tv.is_pinned(), false);
   for (auto &t : tv) {
-    EXPECT_EQ(t->nbytes(), 4 * 2 * sizeof(int32_t));
-    EXPECT_EQ(t->capacity(), 50);
-    EXPECT_EQ(t->is_pinned(), false);
+    EXPECT_EQ(t->type(), DALI_INT32);
   }
 }
 
@@ -123,9 +121,9 @@ TYPED_TEST(TensorVectorSuite, BatchResize) {
   tv.reserve(200);
   tv.template set_type<int32_t>();
   tv.Resize(uniform_list_shape(5, {10, 20}));
-  for (auto &t : tv) {
-    EXPECT_TRUE(t->shares_data());
-  }
+  // for (auto &t : tv) {
+  //   EXPECT_TRUE(t->shares_data());
+  // }
 }
 
 TYPED_TEST(TensorVectorSuite, VariableBatchResizeDown) {
@@ -150,7 +148,7 @@ TYPED_TEST(TensorVectorSuite, EmptyShareContiguous) {
   TensorListShape<> shape = {{100, 0, 0}, {42, 0, 0}};
   tv.Resize(shape, DALI_UINT8);
   for (int i = 0; i < shape.num_samples(); i++) {
-    ASSERT_EQ(tv[i].raw_data(), nullptr);
+    ASSERT_EQ(tv.raw_tensor(i), nullptr);
   }
 
   TensorVector<TypeParam> target;
@@ -160,8 +158,8 @@ TYPED_TEST(TensorVectorSuite, EmptyShareContiguous) {
   ASSERT_EQ(target.shape(), shape);
   ASSERT_TRUE(target.IsContiguous());
   for (int i = 0; i < shape.num_samples(); i++) {
-    ASSERT_EQ(target[i].raw_data(), nullptr);
-    ASSERT_EQ(target[i].raw_data(), tv[i].raw_data());
+    ASSERT_EQ(target.raw_tensor(i), nullptr);
+    ASSERT_EQ(target.raw_tensor(i), tv.raw_tensor(i));
   }
 }
 
@@ -171,7 +169,7 @@ TYPED_TEST(TensorVectorSuite, EmptyShareNonContiguous) {
   TensorListShape<> shape = {{100, 0, 0}, {42, 0, 0}};
   tv.Resize(shape, DALI_UINT8);
   for (int i = 0; i < shape.num_samples(); i++) {
-    ASSERT_EQ(tv[i].raw_data(), nullptr);
+    ASSERT_EQ(tv.raw_tensor(i), nullptr);
   }
 
   TensorVector<TypeParam> target;
@@ -181,8 +179,8 @@ TYPED_TEST(TensorVectorSuite, EmptyShareNonContiguous) {
   ASSERT_EQ(target.shape(), shape);
   ASSERT_FALSE(target.IsContiguous());
   for (int i = 0; i < shape.num_samples(); i++) {
-    ASSERT_EQ(target[i].raw_data(), nullptr);
-    ASSERT_EQ(target[i].raw_data(), tv[i].raw_data());
+    ASSERT_EQ(target.raw_tensor(i), nullptr);
+    ASSERT_EQ(target.raw_tensor(i), tv.raw_tensor(i));
   }
 }
 
diff --git a/dali/pipeline/data/view_test.cc b/dali/pipeline/data/view_test.cc
index 1fbebc99290..d863d1a1a32 100644
--- a/dali/pipeline/data/view_test.cc
+++ b/dali/pipeline/data/view_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -66,8 +66,12 @@ TEST(TensorVector, View) {
   TensorVector<CPUBackend> tvec(10);
   tvec.set_type<int>();
   std::mt19937_64 rng;
+  TensorListShape<3> shape(10);
+  for (int i = 0; i < 10; i++) {
+    shape.set_tensor_shape(i, TensorShape<3>(100+i, 40+i, 3+i));
+  }
+  tvec.Resize(shape);
   for (int i = 0; i < 10; i++) {
-    tvec[i].Resize(TensorShape<3>(100+i, 40+i, 3+i));
     UniformRandomFill(view<int>(tvec[i]), rng, 0, 10000);
   }
 
@@ -79,8 +83,8 @@ TEST(TensorVector, View) {
   EXPECT_EQ(tv_shape, tlv.shape);
   EXPECT_EQ(tlv2.shape, tlv.shape);
   for (int i = 0; i < 10; i++) {
-    EXPECT_EQ(tlv[i].data, tvec[i].data<int>());
-    EXPECT_EQ(tlv2[i].data, tvec[i].data<int>());
+    EXPECT_EQ(tlv[i].data, tvec.tensor<int>(i));
+    EXPECT_EQ(tlv2[i].data, tvec.tensor<int>(i));
     Check(tlv[i], view<int>(tvec[i]));
   }
 }
@@ -89,11 +93,14 @@ TEST(TensorVector, ReinterpretView) {
   TensorVector<CPUBackend> tvec(10);
   tvec.set_type<int>();
   std::mt19937_64 rng;
+  TensorListShape<3> shape(10);
+  for (int i = 0; i < 10; i++) {
+    shape.set_tensor_shape(i, TensorShape<3>(100+i, 40+i, 3+i));
+  }
+  tvec.Resize(shape);
   for (int i = 0; i < 10; i++) {
-    tvec[i].Resize(TensorShape<3>(100+i, 40+i, 3+i));
     UniformRandomFill(view<int>(tvec[i]), rng, 0, 10000);
   }
-
   auto tlv = view<int, 3>(tvec);
   auto tlv_i16 = reinterpret_view<int16_t, 3>(tvec);
   const auto& ctvec = tvec;
diff --git a/dali/pipeline/data/views.h b/dali/pipeline/data/views.h
index 390b4bd509c..1da0e747552 100644
--- a/dali/pipeline/data/views.h
+++ b/dali/pipeline/data/views.h
@@ -82,8 +82,6 @@ TensorShape<ndim> get_tensor_shape(const TensorList<Backend> &tl) {
 template <typename T, int ndim = DynamicDimensions, typename Backend>
 TensorView<detail::storage_tag_map_t<Backend>, T, ndim>
 view(Tensor<Backend> &data) {
-  if (data.shape().empty())
-    return {};
   using U = std::remove_const_t<T>;
   detail::enforce_dim_in_view<ndim>(data.shape());
   return { data.template mutable_data<U>(), convert_dim<ndim>(data.shape()) };
@@ -96,8 +94,6 @@ view(const Tensor<Backend> &data) {
   static_assert(std::is_const<T>::value,
                 "Cannot create a non-const view of a `const Tensor<>`. "
                 "Missing `const` in T?");
-  if (data.shape().empty())
-    return {};
   using U = std::remove_const_t<T>;
   detail::enforce_dim_in_view<ndim>(data.shape());
   return { data.template data<U>(), convert_dim<ndim>(data.shape()) };
@@ -107,23 +103,21 @@ view(const Tensor<Backend> &data) {
 /**
  * @name Convert from SampleView carrying runtime type information to statically typed TensorView.
  */
-// @{
 template <typename T, int ndim = DynamicDimensions, typename Backend>
-TensorView<detail::storage_tag_map_t<Backend>, T, ndim> view(SampleView<Backend> &data) {
+TensorView<detail::storage_tag_map_t<Backend>, T, ndim> view(SampleView<Backend> data) {
   using U = std::remove_const_t<T>;
   detail::enforce_dim_in_view<ndim>(data.shape());
-  return {data.template mutable_data<U>(), data.shape()};
+  return {data.template _mutable_data<U>(), data.shape()};
 }
 
-
 template <typename T, int ndim = DynamicDimensions, typename Backend>
-TensorView<detail::storage_tag_map_t<Backend>, T, ndim> view(const SampleView<Backend> &data) {
+TensorView<detail::storage_tag_map_t<Backend>, T, ndim> view(ConstSampleView<Backend> data) {
   static_assert(std::is_const<T>::value,
                 "Cannot create a non-const view of a `const Tensor<>`. "
                 "Missing `const` in T?");
   using U = std::remove_const_t<T>;
   detail::enforce_dim_in_view<ndim>(data.shape());
-  return {data.template data<U>(), data.shape()};
+  return {data.template _data<U>(), data.shape()};
 }
 // @}
 
@@ -176,7 +170,7 @@ view(TensorVector<Backend> &data) {
 
   std::vector<T *> ptrs(shape.num_samples());
   for (int i = 0; i < shape.num_samples(); i++) {
-    ptrs[i] = data[i].template mutable_data<U>();
+    ptrs[i] = data.template mutable_tensor<U>(i);
   }
   return { std::move(ptrs), convert_dim<ndim>(shape) };
 }
@@ -196,7 +190,7 @@ view(const TensorVector<Backend> &data) {
 
   std::vector<T *> ptrs(shape.num_samples());
   for (int i = 0; i < shape.num_samples(); i++) {
-    ptrs[i] = data[i].template data<U>();
+    ptrs[i] = data.template tensor<U>(i);
   }
   return { std::move(ptrs), convert_dim<ndim>(shape) };
 }
@@ -214,7 +208,7 @@ reinterpret_view(TensorVector<Backend> &data) {
   assert(data.type_info().size() >= sizeof(T));
   assert(data.type_info().size() % sizeof(T) == 0);
   for (int i = 0; i < ret.shape.num_samples(); i++) {
-    ret.data[i] = static_cast<T*>(data[i].raw_mutable_data());
+    ret.data[i] = static_cast<T*>(data.raw_mutable_tensor(i));
   }
   // If reinterpreting to a smaller type, adjust the inner extent
   if (data.type_info().size() > sizeof(T)) {
@@ -243,7 +237,7 @@ reinterpret_view(const TensorVector<Backend> &data) {
   assert(data.type_info().size() >= sizeof(T));
   assert(data.type_info().size() % sizeof(T) == 0);
   for (int i = 0; i < ret.shape.num_samples(); i++) {
-    ret.data[i] = static_cast<T*>(data[i].raw_data());
+    ret.data[i] = static_cast<T*>(data.raw_tensor(i));
   }
   // If reinterpreting to a smaller type, adjust the inner extent
   if (data.type_info().size() > sizeof(T)) {
@@ -256,6 +250,20 @@ reinterpret_view(const TensorVector<Backend> &data) {
   return ret;
 }
 
+
+template <typename Backend>
+SampleView<Backend>
+sample_view(Tensor<Backend> &data) {
+  return { data.raw_mutable_data(), data.shape(), data.type() };
+}
+
+
+template <typename Backend>
+ConstSampleView<Backend>
+const_sample_view(const Tensor<Backend> &data) {
+  return { data.raw_data(), data.shape(), data.type() };
+}
+
 }  // namespace dali
 
 #endif  // DALI_PIPELINE_DATA_VIEWS_H_
diff --git a/dali/pipeline/executor/executor.h b/dali/pipeline/executor/executor.h
index d0eedb81806..3593572c33e 100644
--- a/dali/pipeline/executor/executor.h
+++ b/dali/pipeline/executor/executor.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -147,31 +147,37 @@ class DLL_PUBLIC Executor : public ExecutorBase, public QueuePolicy {
   DLL_PUBLIC void RunGPUImpl();
   DLL_PUBLIC void SyncDevice();
 
-  template<typename T>
+  template <typename T>
   inline void GetMaxSizesCont(T &in, size_t &max_out_size, size_t &max_reserved_size) {
-    auto out_size = in.nbytes();
-    auto reserved_size = in.capacity();
+    auto out_size = in.total_nbytes();
+    auto reserved_size = in.total_capacity();
     max_out_size = std::max<size_t>(std::ceil((out_size * 1.0) / in.num_samples()), max_out_size);
     max_reserved_size = std::max<size_t>(std::ceil((reserved_size * 1.0) / in.num_samples()),
                                          max_reserved_size);
   }
 
-  template<typename T>
+  template <typename T>
   inline void GetMaxSizesNonCont(T &in, size_t &max_out_size, size_t &max_reserved_size) {
-    for (size_t j = 0; j < in.num_samples(); ++j) {
-      max_out_size = std::max(in[j].nbytes(), max_out_size);
-      max_reserved_size = std::max(in[j].capacity(), max_reserved_size);
+    const auto &nbytes = in.nbytes();
+    const auto &capacity = in.capacity();
+    max_out_size = 0;
+    max_reserved_size = 0;
+    for (auto &elem : nbytes) {
+      max_out_size = std::max(max_out_size, elem);
+    }
+    for (auto &elem : capacity) {
+      max_reserved_size = std::max(max_reserved_size, elem);
     }
   }
 
-  template<typename backend>
-  inline void GetMaxSizes(TensorList<backend> &in, size_t &max_out_size,
+  template <typename Backend>
+  inline void GetMaxSizes(TensorList<Backend> &in, size_t &max_out_size,
                           size_t &max_reserved_size) {
     GetMaxSizesCont(in, max_out_size, max_reserved_size);
   }
 
-  template<typename backend>
-  inline void GetMaxSizes(TensorVector<backend> &in, size_t &max_out_size,
+  template <typename Backend>
+  inline void GetMaxSizes(TensorVector<Backend> &in, size_t &max_out_size,
                           size_t &max_reserved_size) {
     if (in.IsContiguous()) {
       GetMaxSizesCont(in, max_out_size, max_reserved_size);
@@ -199,13 +205,13 @@ class DLL_PUBLIC Executor : public ExecutorBase, public QueuePolicy {
           max_reserved_size = 0;
           if (ws.template OutputIsType<CPUBackend>(i)) {
             auto &out = ws.template Output<CPUBackend>(i);
-            out_size = out.nbytes();
-            reserved_size = out.capacity();
+            out_size = out.total_nbytes();
+            reserved_size = out.total_capacity();
             GetMaxSizes(out, max_out_size, max_reserved_size);
           } else {
             auto &out = ws.template Output<GPUBackend>(i);
-            out_size = out.nbytes();
-            reserved_size = out.capacity();
+            out_size = out.total_nbytes();
+            reserved_size = out.total_capacity();
             GetMaxSizes(out, max_out_size, max_reserved_size);
           }
           stats[i].real_size = std::max(out_size, stats[i].real_size);
diff --git a/dali/pipeline/operator/arg_helper_test.cc b/dali/pipeline/operator/arg_helper_test.cc
index c4df97deaa1..5ca69cacead 100644
--- a/dali/pipeline/operator/arg_helper_test.cc
+++ b/dali/pipeline/operator/arg_helper_test.cc
@@ -38,7 +38,7 @@ void SetupData(TensorVector<CPUBackend> &tv,
   tv.set_pinned(false);
   tv.Resize(sh, DALI_FLOAT);
   for (size_t i = 0; i < tv.num_samples(); i++) {
-    float *data = tv[i].mutable_data<float>();
+    float *data = tv.mutable_tensor<float>(i);
     for (int j = 0; j < volume(sh[i]); j++) {
       data[j] = 100 * i + j;
     }
@@ -63,7 +63,7 @@ void ArgValueTestTensorInput(TensorListShape<ndim> ts, AcquireArgs... args) {
     auto sh = ts[i];
     ASSERT_EQ(sh, arg[i].shape);
     for (int j = 0; j < volume(sh); j++) {
-      float *ptr = (*arg_data)[i].mutable_data<float>();
+      float *ptr = arg_data->mutable_tensor<float>(i);
       ASSERT_EQ(ptr[j], arg[i].data[j]);
     }
   }
diff --git a/dali/pipeline/operator/builtin/external_source.cc b/dali/pipeline/operator/builtin/external_source.cc
index a35265729cd..4f47ae6dd88 100644
--- a/dali/pipeline/operator/builtin/external_source.cc
+++ b/dali/pipeline/operator/builtin/external_source.cc
@@ -33,11 +33,12 @@ void ExternalSource<CPUBackend>::RunImpl(HostWorkspace &ws) {
     auto curr_batch_size = shapes.num_samples();
     output.Resize(shapes, tensor_vector_elm.front()->type());
 
+
     for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) {
       thread_pool.AddWork(
-          [&ws, sample_id, &tensor_vector_elm](int tid) {
-            Tensor<CPUBackend> &output_tensor = ws.Output<CPUBackend>(0)[sample_id];
-            output_tensor.Copy((*tensor_vector_elm.front())[sample_id], AccessOrder::host());
+          [&output, sample_id, &tensor_vector_elm](int tid) {
+            output.UnsafeCopySample(sample_id, *tensor_vector_elm.front(), sample_id,
+                                    AccessOrder::host());
           },
           shapes.tensor_size(sample_id));
     }
diff --git a/dali/pipeline/operator/builtin/external_source.h b/dali/pipeline/operator/builtin/external_source.h
index c0cff6a2400..59db76d61df 100644
--- a/dali/pipeline/operator/builtin/external_source.h
+++ b/dali/pipeline/operator/builtin/external_source.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -255,7 +255,7 @@ class ExternalSource : public Operator<Backend>, virtual public BatchSizeProvide
     DomainTimeRange tr("[DALI][ExternalSource] SetDataSource", DomainTimeRange::kViolet);
     TensorVector<SrcBackend> tv(vect_of_tensors.size());
     for (size_t i = 0; i < tv.num_samples(); ++i) {
-      tv[i].ShareData(const_cast<Tensor<SrcBackend> &>(vect_of_tensors[i]));
+      tv.UnsafeSetSample(i, const_cast<Tensor<SrcBackend> &>(vect_of_tensors[i]));
     }
     SetDataSourceHelper(tv, order, ext_src_setting_mode);
   }
diff --git a/dali/pipeline/operator/builtin/external_source_test.cc b/dali/pipeline/operator/builtin/external_source_test.cc
index f8d32a444df..c3ab4d9d6ff 100644
--- a/dali/pipeline/operator/builtin/external_source_test.cc
+++ b/dali/pipeline/operator/builtin/external_source_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -202,7 +202,7 @@ class ExternalSourceTest : public::testing::WithParamInterface<int>,
     CUDA_CALL(cudaStreamSynchronize(ws.has_stream() ? ws.stream() : 0));
 
     for (int j = 0; j < this->batch_size_; ++j) {
-      auto data = tensor_cpu_list.template mutable_tensor<int>(j);
+      const auto *data = tensor_cpu_list.template tensor<int>(j);
       for (int i = 0; i < volume(tensor_cpu_list.tensor_shape(j)); ++i) {
         if (data[i] != check_counter_) {
           return false;
diff --git a/dali/pipeline/operator/builtin/make_contiguous.cc b/dali/pipeline/operator/builtin/make_contiguous.cc
index 2e05a687ec3..ac9d802931f 100644
--- a/dali/pipeline/operator/builtin/make_contiguous.cc
+++ b/dali/pipeline/operator/builtin/make_contiguous.cc
@@ -26,7 +26,7 @@ void MakeContiguousCPU::RunImpl(HostWorkspace &ws) {
   auto &thread_pool = ws.GetThreadPool();
   for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
     thread_pool.AddWork([sample_id, &input, &output] (int tid) {
-      output[sample_id].Copy(input[sample_id], AccessOrder::host());
+      output.UnsafeCopySample(sample_id, input, sample_id, AccessOrder::host());
     }, shapes.tensor_size(sample_id));
   }
   thread_pool.RunAll();
diff --git a/dali/pipeline/operator/builtin/make_contiguous.cu b/dali/pipeline/operator/builtin/make_contiguous.cu
index f2767eddc02..01735a5b62e 100644
--- a/dali/pipeline/operator/builtin/make_contiguous.cu
+++ b/dali/pipeline/operator/builtin/make_contiguous.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,10 +22,11 @@ void MakeContiguousMixed::Run(MixedWorkspace &ws) {
   int sample_dim = input[0].shape().sample_dim();
   size_t batch_size = input.num_samples();
   DALIDataType type = input.type();
+  size_t type_size = input.type_info().size();
 
   for (size_t i = 0; i < input.num_samples(); ++i) {
-    auto &sample = ws.Input<CPUBackend>(0)[i];
-    size_t sample_bytes = sample.nbytes();
+    auto sample = ws.Input<CPUBackend>(0)[i];
+    size_t sample_bytes = sample.shape().num_elements() * type_size;
     if (coalesced && sample_bytes > COALESCE_THRESHOLD)
       coalesced = false;
     DALI_ENFORCE(type == sample.type(), "Inconsistent types in "
diff --git a/dali/pipeline/operator/common.h b/dali/pipeline/operator/common.h
index 6aacdd7f87d..de7fb63ce50 100644
--- a/dali/pipeline/operator/common.h
+++ b/dali/pipeline/operator/common.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -60,7 +60,7 @@ void GetPerSampleArgument(std::vector<T> &output, const std::string &argument_na
                                                batch_size, ") tensor list. Got: ", shape));
 
       output.resize(batch_size);
-      auto *data = arg[0].template data<T>();
+      auto *data = arg.template tensor<T>(0);
 
       for (int i = 0; i < batch_size; i++) {
         output[i] = data[i];
@@ -75,7 +75,7 @@ void GetPerSampleArgument(std::vector<T> &output, const std::string &argument_na
 
       output.resize(batch_size);
       for (int i = 0; i < batch_size; i++) {
-        output[i] = arg[i].template data<T>()[0];
+        output[i] = arg.template tensor<T>(i)[0];
       }
     }
   } else {
@@ -103,8 +103,7 @@ void GetGeneralizedArg(span<T> result, const std::string &name, int sample_idx,
   int argument_length = result.size();
   if (spec.HasTensorArgument(name)) {
     const auto& tv = ws.ArgumentInput(name);
-    const auto& tensor = tv[sample_idx];
-    const auto& shape = tensor.shape();
+    const auto& shape = tv.tensor_shape(sample_idx);
     auto vol = volume(shape);
     if (shape.size() != 0) {
       DALI_ENFORCE(shape.size() == 1,
@@ -118,10 +117,10 @@ void GetGeneralizedArg(span<T> result, const std::string &name, int sample_idx,
     }
     if (vol == 1) {
       for (int i = 0; i < argument_length; i++) {
-        result[i] = tensor.data<T>()[0];
+        result[i] = tv.tensor<T>(sample_idx)[0];
       }
     } else {
-      memcpy(result.data(), tensor.data<T>(), sizeof(T) * argument_length);
+      memcpy(result.data(), tv.tensor<T>(sample_idx), sizeof(T) * argument_length);
     }
     return;
   }
diff --git a/dali/pipeline/operator/op_spec.h b/dali/pipeline/operator/op_spec.h
index a2ac5185787..64fa2ec68ff 100644
--- a/dali/pipeline/operator/op_spec.h
+++ b/dali/pipeline/operator/op_spec.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -494,7 +494,7 @@ inline T OpSpec::GetArgumentImpl(
     DALI_ENFORCE(IsType<T>(value.type()), make_string(
         "Unexpected type of argument \"", name, "\". Expected ",
         TypeTable::GetTypeName<T>(), " and got ", value.type()));
-    return static_cast<T>(value[idx].data<T>()[0]);
+    return static_cast<T>(value.tensor<T>(idx)[0]);
   }
   // Search for the argument locally
   auto arg_it = arguments_.find(name);
@@ -524,7 +524,7 @@ inline bool OpSpec::TryGetArgumentImpl(
     }
     if (!IsType<T>(value.type()))
       return false;
-    result = value[idx].data<T>()[0];
+    result = value.tensor<T>(idx)[0];
     return true;
   }
   // Search for the argument locally
diff --git a/dali/pipeline/operator/op_spec_test.cc b/dali/pipeline/operator/op_spec_test.cc
index 6fd0c9a410d..3cfb67f7473 100644
--- a/dali/pipeline/operator/op_spec_test.cc
+++ b/dali/pipeline/operator/op_spec_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ TEST(OpSpecTest, GetArgumentTensorSet) {
     auto tv = std::make_shared<TensorVector<CPUBackend>>(2);
     tv->Resize(TensorListShape<0>(2), DALI_INT32);
     for (int i = 0; i < 2; i++) {
-      tv->tensor_handle(i)->mutable_data<int32_t>()[0] = 42 + i;
+      view<int32_t>((*tv)[i]).data[0] = 42 + i;
     }
     ws0.AddArgumentInput(arg_name, tv);
     auto spec0 = OpSpec("DummyOpForSpecTest")
@@ -325,18 +325,18 @@ class TestArgumentInput_Producer : public Operator<CPUBackend> {
     // Initialize all the data with a 0, 1, 2 .... sequence
     auto &out0 = ws.Output<CPUBackend>(0);
     for (int i = 0; i < out0.shape().num_samples(); i++) {
-      *out0[i].mutable_data<int>() = i;
+      *out0.mutable_tensor<int>(i) = i;
     }
 
     auto &out1 = ws.Output<CPUBackend>(1);
     for (int i = 0; i < out1.shape().num_samples(); i++) {
-      *out1[i].mutable_data<float>() = i;
+      *out1.mutable_tensor<float>(i) = i;
     }
 
     auto &out2 = ws.Output<CPUBackend>(2);
     for (int i = 0; i < out2.shape().num_samples(); i++) {
       for (int j = 0; j < 2; j++) {
-        out2[i].mutable_data<int>()[j] = i;
+        out2.mutable_tensor<int>(i)[j] = i;
       }
     }
   }
@@ -379,7 +379,7 @@ class TestArgumentInput_Consumer : public Operator<CPUBackend> {
     ASSERT_TRUE(is_uniform(ref_1.shape()));
     ASSERT_EQ(ref_1.shape()[0], TensorShape<>(1));
     for (int i = 0; i < ref_1.shape().num_samples(); i++) {
-      EXPECT_EQ(ref_1[i].data<float>()[0], i);
+      EXPECT_EQ(ref_1.tensor<float>(i)[0], i);
     }
 
     auto &ref_2 = ws.ArgumentInput("arg2");
@@ -388,7 +388,7 @@ class TestArgumentInput_Consumer : public Operator<CPUBackend> {
     ASSERT_EQ(ref_2.shape()[0], TensorShape<>(1, 2));
     for (int i = 0; i < ref_2.shape().num_samples(); i++) {
       for (int j = 0; j < 2; j++) {
-        EXPECT_EQ(ref_2[i].data<int>()[j], i);
+        EXPECT_EQ(ref_2.tensor<int>(i)[j], i);
       }
     }
   }
diff --git a/dali/pipeline/operator/operator.h b/dali/pipeline/operator/operator.h
index 2e82760b4d5..01908726145 100644
--- a/dali/pipeline/operator/operator.h
+++ b/dali/pipeline/operator/operator.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -337,7 +337,11 @@ class Operator<CPUBackend> : public OperatorBase {
         this->RunImpl(sample);
       }, -data_idx);  // -data_idx for FIFO order
     }
+    // Run all tasks and wait for them to finish
     thread_pool.RunAll();
+    // Propagate metadata from individual samples to the whole batch as working with SampleWorkspace
+    // breaks metadata consistency - it sets it only to samples
+    EnforceCorrectness(ws, CanInferOutputs());
   }
 
   /**
diff --git a/dali/pipeline/pipeline_test.cc b/dali/pipeline/pipeline_test.cc
index d941c25be8a..bcfc57d88a7 100644
--- a/dali/pipeline/pipeline_test.cc
+++ b/dali/pipeline/pipeline_test.cc
@@ -301,13 +301,13 @@ class DummyPresizeOpCPU : public Operator<CPUBackend> {
     const auto &input = ws.Input<CPUBackend>(0);
     int num_samples = input.shape().num_samples();
     auto &output = ws.Output<CPUBackend>(0);
-    auto tmp_size = output.capacity();
+    auto tmp_size = output.total_capacity();
     output.set_type<size_t>();
     output.Resize(uniform_list_shape(num_samples, std::vector<int64_t>{2}));
     for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) {
-      auto *out = output[sample_idx].mutable_data<size_t>();
+      auto *out = output.mutable_tensor<size_t>(sample_idx);
       out[0] = tmp_size;
-      out[1] = input.capacity();
+      out[1] = input.total_capacity();
     }
   }
 };
@@ -327,7 +327,7 @@ class DummyPresizeOpGPU : public Operator<GPUBackend> {
     int num_samples = input.shape().num_samples();
     auto &output = ws.Output<GPUBackend>(0);
     output.set_type<size_t>();
-    size_t tmp_size[2] = {output.capacity(), input.capacity()};
+    size_t tmp_size[2] = {output.total_capacity(), input.total_capacity()};
     output.Resize(uniform_list_shape(num_samples, std::vector<int64_t>{2}));
     for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) {
       auto *out = output.mutable_tensor<size_t>(sample_idx);
@@ -353,7 +353,7 @@ class DummyPresizeOpMixed : public Operator<MixedBackend> {
     int num_samples = input.shape().num_samples();
     auto &output = ws.Output<GPUBackend>(0);
     output.set_type<size_t>();
-    size_t tmp_size[2] = {output.capacity(), input.capacity()};
+    size_t tmp_size[2] = {output.total_capacity(), input.total_capacity()};
     output.Resize(uniform_list_shape(num_samples, std::vector<int64_t>{2}));
     for (int sample_idx = 0; sample_idx < num_samples; sample_idx++) {
       auto *out = output.mutable_tensor<size_t>(sample_idx);
diff --git a/dali/pipeline/workspace/sample_workspace.cc b/dali/pipeline/workspace/sample_workspace.cc
index 7e75bc7c725..6d82eb9d6cf 100644
--- a/dali/pipeline/workspace/sample_workspace.cc
+++ b/dali/pipeline/workspace/sample_workspace.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,10 +25,10 @@ void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx,
   for (int i = 0; i < num_inputs; i++) {
     if (batch.InputIsType<CPUBackend>(i)) {
       auto &input_ref = batch.UnsafeMutableInput<CPUBackend>(i);
-      sample.AddInput(&input_ref[data_idx]);
+      sample.AddInput(input_ref.tensor_handle(data_idx).get());
     } else {
       auto &input_ref = batch.UnsafeMutableInput<GPUBackend>(i);
-      sample.AddInput(&input_ref[data_idx]);
+      sample.AddInput(input_ref.tensor_handle(data_idx).get());
     }
   }
 
@@ -36,10 +36,10 @@ void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx,
   for (int i = 0; i < num_outputs; i++) {
     if (batch.OutputIsType<CPUBackend>(i)) {
       auto &output_ref = batch.Output<CPUBackend>(i);
-      sample.AddOutput(&output_ref[data_idx]);
+      sample.AddOutput(output_ref.tensor_handle(data_idx).get());
     } else {
       auto &output_ref = batch.Output<GPUBackend>(i);
-      sample.AddOutput(&output_ref[data_idx]);
+      sample.AddOutput(output_ref.tensor_handle(data_idx).get());
     }
   }
   for (auto& arg_pair : batch) {
@@ -48,4 +48,10 @@ void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx,
   }
 }
 
+void EnforceCorrectness(HostWorkspace& ws, bool contiguous) {
+  for (int i = 0; i < ws.NumOutput(); i++) {
+    ws.Output<CPUBackend>(i).PropagateUp(contiguous);
+  }
+}
+
 }  // namespace dali
diff --git a/dali/pipeline/workspace/sample_workspace.h b/dali/pipeline/workspace/sample_workspace.h
index 9e281db5437..102a9d269b4 100644
--- a/dali/pipeline/workspace/sample_workspace.h
+++ b/dali/pipeline/workspace/sample_workspace.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -134,6 +134,17 @@ class DLL_PUBLIC SampleWorkspace : public WorkspaceBase<SampleInputType, SampleO
 DLL_PUBLIC void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx,
                                  int thread_idx);
 
+/**
+ * @brief After running sample-wise operator we need to fix the Tensor Vector guarantees that were
+ * broken by the legacy operators operating just on samples. We propagate the properties from
+ * samples and enforce they are consistent.
+ *
+ * TODO(klecki): Introduce RAII wrapper for MakeSampleView and EnforceCorrectness
+ * @param batch The workspace to update after executing samplewise operator
+ * @param contiguous If the operator infers outputs and thus uses contiguous allocations
+ */
+DLL_PUBLIC void EnforceCorrectness(HostWorkspace& ws, bool contiguous);
+
 }  // namespace dali
 
 #endif  // DALI_PIPELINE_WORKSPACE_SAMPLE_WORKSPACE_H_
diff --git a/dali/python/backend_impl.cc b/dali/python/backend_impl.cc
index dc8bb9d776d..0bd465bed7c 100644
--- a/dali/python/backend_impl.cc
+++ b/dali/python/backend_impl.cc
@@ -593,7 +593,7 @@ std::unique_ptr<Tensor<Backend> > TensorListGetItemImpl(TensorList<Backend> &t,
   auto ptr = std::make_unique<Tensor<Backend>>();
   // TODO(klecki): Rework this with proper sample-based tensor batch data structure
   auto sample_shared_ptr = unsafe_sample_owner(t, id);
-  ptr->ShareData(sample_shared_ptr, t.capacity(), t.is_pinned(), t.shape()[id], t.type());
+  ptr->ShareData(sample_shared_ptr, t.total_capacity(), t.is_pinned(), t.shape()[id], t.type());
   ptr->set_device_id(t.device_id());
   ptr->SetMeta(t.GetMeta(id));
   return ptr;
@@ -623,7 +623,7 @@ std::shared_ptr<TensorList<Backend>> TensorListFromListOfTensors(py::list &list_
             cur_type, "' expected to have type '", DALIDataType(expected_type), "'."));
       }
 
-      tv[i].ShareData(t);
+      tv.UnsafeSetSample(i, t);
     } catch (const py::type_error &) {
       throw;
     } catch (const std::runtime_error &) {
@@ -1269,7 +1269,8 @@ void FeedPipeline(Pipeline *p, const string &name, py::list list, AccessOrder or
   TensorVector<Backend> tv(list.size());
   for (size_t i = 0; i < list.size(); ++i) {
     auto &t = list[i].cast<Tensor<Backend>&>();
-    tv[i] = std::move(t);
+    tv.UnsafeSetSample(i, t);
+    // TODO(klecki): tv[i] = std::move(t);
   }
   p->SetExternalInput(name, tv, order, sync, use_copy_kernel);
 }
diff --git a/dali/test/dali_test.h b/dali/test/dali_test.h
index 917f3c8dbc0..dbddb54c443 100644
--- a/dali/test/dali_test.h
+++ b/dali/test/dali_test.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -186,7 +186,7 @@ class DALITest : public ::testing::Test {
     for (int i = 0; i < n; ++i) {
       std::memcpy(tl->template mutable_tensor<uint8>(i), data[i % nImgs],
                   data_sizes[i % nImgs]);
-      tl->SetSourceInfo(i, imgs.filenames_[i % nImgs]);
+      tl->GetMeta(i).SetSourceInfo(imgs.filenames_[i % nImgs]);
     }
   }
 
diff --git a/dali/test/dali_test_decoder.h b/dali/test/dali_test_decoder.h
index 8f3cdd75509..977c56fb7f5 100644
--- a/dali/test/dali_test_decoder.h
+++ b/dali/test/dali_test_decoder.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -32,7 +32,8 @@ class GenericDecoderTest : public DALISingleOpTest<ImgType> {
     // single input - encoded images
     // single output - decoded images
 
-     TensorVector<CPUBackend> out(inputs[0]->num_samples());
+    TensorVector<CPUBackend> out(inputs[0]->num_samples());
+    std::vector<Tensor<CPUBackend>> tmp_out(inputs[0]->num_samples());
 
     const TensorList<CPUBackend> &encoded_data = *inputs[0];
 
@@ -41,7 +42,16 @@ class GenericDecoderTest : public DALISingleOpTest<ImgType> {
       auto *data = encoded_data.tensor<unsigned char>(i);
       auto data_size = volume(encoded_data.tensor_shape(i));
 
-      this->DecodeImage(data, data_size, c, this->ImageType(), &out[i]);
+      this->DecodeImage(data, data_size, c, this->ImageType(), &tmp_out[i]);
+    }
+
+    TensorListShape<> out_shape(inputs[0]->num_samples(), 3);
+    for (size_t i = 0; i < encoded_data.num_samples(); ++i) {
+      out_shape.set_tensor_shape(i, tmp_out[i].shape());
+    }
+    out.Resize(out_shape, DALI_UINT8);
+    for (size_t i = 0; i < encoded_data.num_samples(); ++i) {
+      out.UnsafeSetSample(i, tmp_out[i]);
     }
 
     vector<std::shared_ptr<TensorList<CPUBackend>>> outputs;
diff --git a/dali/test/dali_test_resize.h b/dali/test/dali_test_resize.h
index 5e63a07b3b0..5a5abee77f4 100755
--- a/dali/test/dali_test_resize.h
+++ b/dali/test/dali_test_resize.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #ifndef DALI_TEST_DALI_TEST_RESIZE_H_
 #define DALI_TEST_DALI_TEST_RESIZE_H_
 
@@ -28,6 +28,7 @@ class GenericResizeTest : public DALISingleOpTest<ImgType> {
     // single input - encoded images
     // single output - decoded images
     TensorVector<CPUBackend> out(inputs[0]->num_samples());
+    std::vector<Tensor<CPUBackend>> tmp_out(inputs[0]->num_samples());
     const TensorList<CPUBackend>& image_data = *inputs[0];
 
     const uint32_t resizeOptions = getResizeOptions();
@@ -175,12 +176,23 @@ class GenericResizeTest : public DALISingleOpTest<ImgType> {
         finalImg = &mirror_img;
       }
 
-      out[i].Resize({finalImg->rows, finalImg->cols, c}, DALI_UINT8);
-      auto *out_data = out[i].mutable_data<unsigned char>();
+      tmp_out[i].Resize({finalImg->rows, finalImg->cols, c}, DALI_UINT8);
+      auto *out_data = tmp_out[i].mutable_data<unsigned char>();
 
       std::memcpy(out_data, finalImg->ptr(), finalImg->rows * finalImg->cols * c);
     }
 
+    TensorListShape<> shape(tmp_out.size(), tmp_out[0].shape().sample_dim());
+    for (size_t i = 0; i < image_data.num_samples(); ++i) {
+      shape.set_tensor_shape(i, tmp_out[i].shape());
+    }
+    // TODO(klecki): If sharing we do not need to resize, we just need to enforce that we have
+    // enough samples
+    out.Resize(shape, tmp_out[0].type());
+    for (size_t i = 0; i < image_data.num_samples(); ++i) {
+      out.UnsafeSetSample(i, tmp_out[i]);
+    }
+
     vector<std::shared_ptr<TensorList<CPUBackend>>> outputs;
     outputs.push_back(std::make_shared<TensorList<CPUBackend>>());
     outputs[0]->Copy(out);
diff --git a/dali/util/pybind.h b/dali/util/pybind.h
index b5400808b2f..790000b7dcd 100644
--- a/dali/util/pybind.h
+++ b/dali/util/pybind.h
@@ -203,8 +203,8 @@ static py::capsule DLTensorToCapsule(DLMTensorPtr dl_tensor) {
 }
 
 template <typename Backend>
-py::capsule TensorToDLPackView(Tensor<Backend> &tensor) {
-  DLMTensorPtr dl_tensor = GetDLTensorView(tensor);
+py::capsule TensorToDLPackView(SampleView<Backend> tensor, int device_id) {
+  DLMTensorPtr dl_tensor = GetDLTensorView(tensor, device_id);
   return DLTensorToCapsule(std::move(dl_tensor));
 }