This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
/
ndarray.h
1363 lines (1298 loc) · 48.2 KB
/
ndarray.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Copyright (c) 2015 by Contributors
* \file ndarray.h
* \brief NDArray interface that handles array arithematics.
*/
#ifndef MXNET_NDARRAY_H_
#define MXNET_NDARRAY_H_
#include <dmlc/base.h>
#include <dmlc/logging.h>
#include <dmlc/io.h>
#include <dmlc/type_traits.h>
#include <dmlc/registry.h>
#include <nnvm/node.h>
#include <vector>
#include <map>
#include <string>
#include <algorithm>
#include <memory>
#include <algorithm>
#if MXNET_USE_MKLDNN == 1
#include <mkldnn.hpp>
#endif
#include "./base.h"
#include "./storage.h"
#include "./engine.h"
// check c++11
#if DMLC_USE_CXX11 == 0
#error "cxx11 was required for ndarray module"
#endif
namespace mxnet {
// enum for storage types
namespace csr {
enum CSRAuxType {kIndPtr, kIdx};
}
namespace rowsparse {
enum RowSparseAuxType {kIdx};
}
enum NDArrayStorageType {
kUndefinedStorage = -1, // undefined storage
kDefaultStorage, // dense
kRowSparseStorage, // row sparse
kCSRStorage, // csr
};
enum NDArrayFormatErr {
kNormalErr, // normal
kCSRShapeErr, // shape mismatch for csr
kCSRIndPtrErr, // indptr error for csr
kCSRIdxErr, // idx error for csr
kRSPShapeErr, // shape mismatch for row sparse
kRSPIdxErr, // indices error for row sparse
};
class MKLDNNMemory;
/*!
* \brief ndarray interface
*/
class NDArray {
public:
/*! \brief default constructor */
NDArray() {
}
/*!
* \brief constructs a new dynamic NDArray
* \param shape the shape of array
* \param ctx context of NDArray
* \param delay_alloc whether delay the allocation
* \param dtype data type of this ndarray
*/
NDArray(const TShape &shape, Context ctx,
bool delay_alloc = false, int dtype = mshadow::default_type_flag)
: ptr_(std::make_shared<Chunk>(shape, ctx, delay_alloc, dtype)),
shape_(shape), dtype_(dtype), storage_type_(kDefaultStorage),
entry_({nullptr, 0, 0}) {
}
/*! \brief constructor for NDArray with storage type
*/
NDArray(const NDArrayStorageType stype, const TShape &shape, Context ctx,
bool delay_alloc = true, int dtype = mshadow::default_type_flag,
std::vector<int> aux_types = {}, std::vector<TShape> aux_shapes = {},
TShape storage_shape = TShape(mshadow::Shape1(0)));
/*!
* \brief constructing a static NDArray that shares data with TBlob
* Use with caution: allocate ONLY ONE NDArray for each TBlob,
* make sure the memory region is available through out the life of NDArray
* \param data the memory content of static data
* \param dev_id the device id this tensor sits at
*/
NDArray(const TBlob &data, int dev_id)
: ptr_(std::make_shared<Chunk>(data, dev_id)), shape_(data.shape_),
dtype_(data.type_flag_), storage_type_(kDefaultStorage),
entry_({nullptr, 0, 0}) {
}
/*! \brief create ndarray from shared memory */
NDArray(int shared_pid, int shared_id, const TShape& shape, int dtype)
: ptr_(std::make_shared<Chunk>(shared_pid, shared_id, shape, dtype)), shape_(shape),
dtype_(dtype), storage_type_(kDefaultStorage), entry_({nullptr, 0, 0}) {
}
/*!
* \brief constructing a static NDArray of non-default storage that shares data with TBlob
* Use with caution: allocate ONLY ONE NDArray for each TBlob,
* make sure the memory region is available through out the life of NDArray
* \param stype the storage type of NDArray
* \param shape the shape of NDArray
* \param data the memory content of static data
* \param aux_data the memory content of static aux data
* \param dev_id the device id this tensor sits at
*/
NDArray(const NDArrayStorageType stype, const TShape &shape,
const TBlob &data, const std::vector<TBlob> &aux_data, int dev_id)
: ptr_(std::make_shared<Chunk>(stype, data, aux_data, dev_id)), shape_(shape),
dtype_(data.type_flag_), storage_type_(stype), entry_({nullptr, 0, 0}) {
}
/*
* This indicates whether an array is a view of another array (created by
* reshape or slice). If an array is a view and the the data is stored in
* MKLDNN format, we need to convert the data to the default format when
* data in the view is accessed.
*/
inline bool IsView() const {
// View only works on the default storage
if (storage_type() != kDefaultStorage)
return false;
// If the array reuses memory, its shape may be different from the storage
// shape. However, we shouldn't consider it as a view.
if (reuse_)
return false;
return byte_offset_ > 0 || shape() != ptr_->storage_shape;
}
/*!
* \return the shape of current NDArray.
*/
inline const TShape& shape() const {
return shape_;
}
/*!
* \return the shape of underlying chunk which stores the NDArray data/value.
* It is only intended for non-default storage. For row-sparse storage, it is the shape of
* the tensor which stores the non-zero values.
*/
inline const TShape &storage_shape() const {
CHECK(ptr_ != nullptr);
CHECK_NE(storage_type(), kDefaultStorage)
<< "storage_shape() is not intended for kDefaultStorage.";
return ptr_->storage_shape;
}
/*!
* \brief get the shape of aux_data(index)
* \param index the index of the aux data
* \return the shape of aux data at given index
*/
inline const TShape& aux_shape(size_t index) const {
CHECK_NE(storage_type(), kDefaultStorage)
<< "aux_shape() is not intended for kDefaultStorage.";
return ptr_->aux_shapes[index];
}
/* \return the shapes of all aux data */
const std::vector<TShape>& aux_shapes() const {
CHECK_NE(storage_type(), kDefaultStorage)
<< "aux_shapes() is not intended for kDefaultStorage.";
return ptr_->aux_shapes;
}
/*! returns the dtypes of all aux data */
const std::vector<int>& aux_types() const {
CHECK_NE(storage_type(), kDefaultStorage)
<< "aux_types() is not intended for kDefaultStorage.";
return ptr_->aux_types;
}
/*!
* \brief For a sparse operation on a csr matrix for example,
* the size of the column index array
* is an estimated value in the beginning for allocating enough capacity
* for the final result. After the operation is done, the exact size of
* the shape is known and need to be reset using this function.
*/
inline void set_aux_shape(size_t index, const TShape& shape) const {
ptr_->set_aux_shape(index, shape);
}
/*!
* \return the data TBlob
*/
inline const TBlob& data() const {
if (storage_type() == kDefaultStorage) CheckAndAlloc();
SetTBlob();
return tblob_;
}
/*!
* \return the gradient ndarray.
*/
NDArray grad() const;
/*!
* \return the aux TBlob
*/
inline TBlob aux_data(size_t i) const {
auto stype = storage_type();
TBlob res;
auto shape = aux_shape(i);
auto type = aux_type(i);
MSHADOW_TYPE_SWITCH(type, DType, {
auto dptr = static_cast<DType*>(ptr_->aux_handles[i].dptr);
CHECK(stype == kRowSparseStorage || stype == kCSRStorage)
<< "Unexpected storage type: " << stype;
res = TBlob(dptr, shape, ptr_->aux_handles[i].ctx.dev_mask(), type);
});
return res;
}
/*!
* \return the context of NDArray, this function is only valid when the NDArray is not empty
*/
inline Context ctx() const {
CHECK(!is_none());
return ptr_->shandle.ctx;
}
/*!
* \return the data type of NDArray, this function is only valid when the NDArray is not empty
*/
inline int dtype() const {
return dtype_;
}
inline int aux_type(size_t i) const {
CHECK(!is_none());
return ptr_->aux_types[i];
}
inline NDArrayStorageType storage_type() const {
return storage_type_;
}
/*! \return whether this ndarray is not initialized */
inline bool is_none() const {
return ptr_.get() == nullptr;
}
/*! \return updated grad state in entry_ */
bool fresh_out_grad() const;
/*! \return updated grad state in entry_ */
void set_fresh_out_grad(bool state) const;
/*! \brief Returns true if a sparse ndarray's aux_data and storage are initialized
* Throws an exception if the indices array shape is inconsistent
* Returns false if the indices array is empty(nnz = 0) for csr/row_sparse
*/
inline bool storage_initialized() const {
if (is_none()) return false;
auto stype = storage_type();
CHECK_NE(stype, kDefaultStorage)
<< "storage_initialized() is not intended for kDefaultStorage.";
if (stype == kRowSparseStorage) {
CHECK_EQ(aux_shape(rowsparse::kIdx)[0], storage_shape()[0])
<< "inconsistent storage shape " << storage_shape()
<< " vs. aux shape " << aux_shape(rowsparse::kIdx);
return aux_shape(rowsparse::kIdx).Size() != 0;
} else if (stype == kCSRStorage) {
CHECK_EQ(aux_shape(csr::kIdx)[0], storage_shape()[0])
<< "inconsistent storage shape " << storage_shape()
<< " vs. aux shape " << aux_shape(csr::kIdx);
return aux_shape(csr::kIdx).Size() != 0;
} else {
LOG(FATAL) << "Unknown storage type";
}
return true;
}
/*! \brief get storage handle */
inline Storage::Handle storage_handle() const {
CHECK(!is_none());
CHECK_EQ(storage_type(), kDefaultStorage);
CheckAndAlloc();
return ptr_->shandle;
}
/*!
* \brief Block until all the pending write operations with respect
* to current NDArray are finished, and read can be performed.
*/
inline void WaitToRead() const {
if (is_none()) return;
Engine::Get()->WaitForVar(ptr_->var);
}
/*!
* \brief Block until all the pending read/write operations with respect
* to current NDArray are finished, and write can be performed.
*/
inline void WaitToWrite() const {
if (is_none()) return;
/*!
* Push an empty mutable function to flush all preceding reads to the
* variable.
*/
Engine::Get()->PushAsync(
[](RunContext, Engine::CallbackOnComplete on_complete) {
on_complete();
}, Context{}, {}, {ptr_->var});
Engine::Get()->WaitForVar(ptr_->var);
}
/*! \return the associated variable of the ndarray.*/
inline Engine::VarHandle var() const {
return ptr_->var;
}
/*! \return byte offset in chunk of the ndarray*/
inline size_t byte_offset() const {
return byte_offset_;
}
/*!
* \brief save the content into binary stream
* \param strm the output stream
*/
void Save(dmlc::Stream *strm) const;
/*!
* \brief load ndarrays before supporting sparse ndarrays
* \param strm the output stream
* \param magic the magic number used for version control
*/
bool LegacyLoad(dmlc::Stream *strm, const uint32_t magic);
/*!
* \brief load the content from binary stream
* \param strm the output stream
* \return whether the load is successful
*/
bool Load(dmlc::Stream *strm);
/*!
* \brief set all the elements in ndarray to be scalar
* \param scalar the scalar to set
* \return reference of self
*/
NDArray &operator=(real_t scalar);
/*!
* \brief elementwise add to current space
* this mutate the current NDArray
* \param src the data to add
* \return reference of self
*/
NDArray &operator+=(const NDArray &src);
/*!
* \brief elementwise add to current space
* this mutate the current NDArray
* \param src the data to add
* \return reference of self
*/
NDArray &operator+=(const real_t &src);
/*!
* \brief elementwise subtract from current ndarray
* this mutate the current NDArray
* \param src the data to subtract
* \return reference of self
*/
NDArray &operator-=(const NDArray &src);
/*!
* \brief elementwise subtract from current ndarray
* this mutate the current NDArray
* \param src the data to subtract
* \return reference of self
*/
NDArray &operator-=(const real_t &src);
/*!
* \brief elementwise multiplication to current ndarray
* this mutate the current NDArray
* \param src the data to subtract
* \return reference of self
*/
NDArray &operator*=(const NDArray &src);
/*!
* \brief elementwise multiplication to current ndarray
* this mutate the current NDArray
* \param src the data to subtract
* \return reference of self
*/
NDArray &operator*=(const real_t &src);
/*!
* \brief elementwise division from current ndarray
* this mutate the current NDArray
* \param src the data to subtract
* \return reference of self
*/
NDArray &operator/=(const NDArray &src);
/*!
* \brief elementwise division from current ndarray
* this mutate the current NDArray
* \param src the data to subtract
* \return reference of self
*/
NDArray &operator/=(const real_t &src);
/*!
* \brief return a new copy this NDArray
* \param ctx the new context of this NDArray
* \return the new copy
*/
NDArray Copy(Context ctx) const;
/*!
* \brief Do a synchronize copy from a continugous CPU memory region.
*
* This function will call WaitToWrite before the copy is performed.
* This is useful to copy data from existing memory region that are
* not wrapped by NDArray(thus dependency not being tracked).
*
* \param data the data source to copy from.
* \param size the size of the source array, in sizeof(DType) not raw btyes.
*/
void SyncCopyFromCPU(const void *data, size_t size) const;
/*!
* \brief Copy from src.data()/aux_data(i) to this->data()/aux_data(j)
*/
void SyncCopyFromNDArray(const NDArray &src, int i = -1, int j = -1);
/*!
* \brief Do a synchronize copy to a continugous CPU memory region.
*
* This function will call WaitToRead before the copy is performed.
* This is useful to copy data from existing memory region that are
* not wrapped by NDArray(thus dependency not being tracked).
*
* \param data the data source to copyinto.
* \param size the memory size we want to copy into, in sizeof(DType) not raw btyes.
*/
void SyncCopyToCPU(void *data, size_t size) const;
/*!
* \brief check whether the NDArray format is valid
* \param full_check if `True`, rigorous check, O(N) operations
* Otherwise basic check, O(1) operations
*/
void SyncCheckFormat(const bool full_check) const;
/*!
* \brief Slice a NDArray
* \param begin begin index in first dim (inclusive)
* \param end end index in first dim (exclusive)
* \return sliced NDArray
*/
NDArray Slice(index_t begin, index_t end) const;
/*!
* \brief Slice a NDArray. Supports recording with autograd
* \param begin begin index in first dim (inclusive)
* \param end end index in first dim (exclusive)
* \return sliced NDArray
*/
NDArray SliceWithRecord(index_t begin, index_t end);
/*!
* \brief Index a NDArray
* \param idx the index
* \return idx-th sub array NDArray
*/
NDArray At(index_t idx) const;
/*!
* \brief Index a NDArray
* \param idx the index
* \return idx-th sub array NDArray
*/
NDArray AtWithRecord(index_t idx);
/*!
* \brief Generate a deep copy of aux_data(i) returned as
* a default storage type NDArray
*/
NDArray aux_ndarray(size_t i) const;
/*!
* \brief Generate a deep copy of data() returned as a
* default storage type NDArray
*/
NDArray data_ndarray() const;
/*!
* \brief Create a NDArray that shares memory with current one
* The new array must have smaller memory size than the current array.
* \param shape new shape
* \param dtype The data type.
* \return NDArray in new shape and type.
*/
inline NDArray AsArray(const TShape &shape, int dtype) const {
CHECK_EQ(storage_type(), kDefaultStorage)
<< "AsArray is intended only for kDefaultStorage.";
CHECK_GE(ptr_->shandle.size,
shape.Size() * mshadow::mshadow_sizeof(dtype))
<< "NDArray.AsArray: target memory size is bigger";
// We can't reuse memory in a view.
CHECK(!IsView());
NDArray ret = *this;
ret.shape_ = shape;
ret.dtype_ = dtype;
ret.reuse_ = true;
return ret;
}
/*!
* \brief Update ndarray chunk storage handles using existing ndarray storage handles
* Also update the aux_handle, aux_shapes and aux_types.
* This is specifically used for custom op to update the inputs and outputs from
* the temporary ndarray which stores intermediate custom op results.
* Should be used with caution elsewhere. Supports only CSR and RSP formats.
*/
inline void SparseUpdateChunk(const NDArray &arr) const {
CHECK(shape_ == arr.shape_) << "ndarray shape is different from the target";
CHECK(dtype_ == arr.dtype_) << "ndarray dtype is different from the target";
auto stype = arr.storage_type();
CHECK(stype == kCSRStorage || stype == kRowSparseStorage)
<< "Only to be used with CSR and RSP storage types";
// swap shandles between src and dst
Storage::Handle shandle_dst = arr.ptr_->shandle;
arr.ptr_->shandle = ptr_->shandle;
ptr_->shandle = shandle_dst;
ptr_->storage_shape = arr.ptr_->storage_shape;
ptr_->storage_type = arr.ptr_->storage_type;
ptr_->ctx = arr.ptr_->ctx;
// swap aux_handles between src and dst
size_t aux_idx = 0;
CHECK(ptr_->aux_handles.size() == arr.ptr_->aux_handles.size())
<< "ndarray number of aux_handles is different from target";
for (auto &aux_handle : arr.ptr_->aux_handles) {
Storage::Handle aux_dst = ptr_->aux_handles[aux_idx];
ptr_->aux_handles[aux_idx] = aux_handle;
aux_handle = aux_dst;
aux_idx++;
}
ptr_->aux_types = arr.ptr_->aux_types;
ptr_->aux_shapes = arr.ptr_->aux_shapes;
}
/*!
* \brief Get an reshaped NDArray
* \param shape new shape
* \return NDArray in new shape
*/
NDArray Reshape(const TShape &shape) const;
/*!
* \brief Get an reshaped NDArray. Supports autograd recording
* \param shape new shape
* \return NDArray in new shape
*/
NDArray ReshapeWithRecord(const TShape &shape);
/*!
* \brief Return a copy of this NDArray without autograd history
*/
NDArray Detach() const {
NDArray ret(*this);
ret.entry_ = nnvm::NodeEntry{nullptr, 0, 0};
return ret;
}
nnvm::Symbol get_autograd_symbol() const;
/*!
* \brief Allocate the space if it is delayed allocated.
* This is an internal function used by system that normal user should not use
*/
inline void CheckAndAlloc() const {
CHECK_EQ(storage_type(), kDefaultStorage);
ptr_->CheckAndAlloc();
}
/*!
* \brief Allocate the space if the allocation has been delayed
* or the requested size is bigger than the available one.
* This function can only be called by ndarray of default
* storage type and effectively changes the ndarray's shape_.
* Note: This function is named as this to avoid overload conflict
* with CheckAndAlloc(const std::vector<TShape> &aux_shapes), since
* TShape tmp = some_shape is equivalent to TShape tmp = {some_shape}.
*/
void ReshapeAndAlloc(const TShape& shape) {
CHECK_EQ(storage_type(), kDefaultStorage);
CHECK(!is_none());
shape_ = shape;
ptr_->CheckAndAlloc(shape.Size() * mshadow::mshadow_sizeof(dtype_));
}
/* !
* \brief Alloc memory for non-default storage
* aux_shape is only known at run time
*/
inline void CheckAndAlloc(const std::vector<TShape> &aux_shapes) const {
CHECK_NE(storage_type(), kDefaultStorage)
<< "CheckAndAlloc(aux_shapes) is not intended for kDefaultStorage";
ptr_->CheckAndAlloc(shape_, aux_shapes, dtype_);
}
inline void CheckAndAllocData(const TShape &storage_shape) const {
CHECK_NE(storage_type(), kDefaultStorage)
<< "CheckAndAllocData is not intended for kDefaultStorage";
ptr_->CheckAndAllocData(storage_shape, dtype_);
}
inline void CheckAndAllocAuxData(size_t i, const TShape &aux_shape) const {
CHECK_NE(storage_type(), kDefaultStorage)
<< "CheckAndAllocAuxData is not intended for kDefaultStorage";
ptr_->CheckAndAllocAuxData(i, aux_shape);
}
#if MXNET_USE_MKLDNN == 1
/*
* Test if the data is stored in one of special MKLDNN format.
*/
bool IsMKLDNNData() const {
return ptr_->IsMKLDNN();
}
/*
* Test if the data is stored in one of default MXNet formats.
*/
bool IsDefaultData() const {
return ptr_->IsDefault();
}
/*
* All functions below return a raw pointer to mkldnn memory. Actually there
* is a shared pointer that hold the memory either in NDArray or in MKLDNN
* stream. As long as we call these functions inside an operator, the return
* memory is always valid.
*/
/*
* This function returns mkldnn::memory with the default primitive_desc.
*/
const mkldnn::memory *GetMKLDNNData() const;
/*
* This function returns mkldnn::memory with the given primitive_desc
* as long as the array size meets the required size in the given primitive_desc.
*/
const mkldnn::memory *GetMKLDNNData(
const mkldnn::memory::primitive_desc &desc) const;
/*
* This function returns mkldnn::memory with the given primitive_desc.
* The returned mkldnn::memory will have the same physical layout as
* the given primitive_desc.
*/
const mkldnn::memory *GetMKLDNNDataReorder(
const mkldnn::memory::primitive_desc &desc) const;
/*
* This function copies data from mkldnn memory.
*/
void CopyFrom(const mkldnn::memory &mem);
/*
* This function allocates memory for array and creates mkldnn memory
* with the specified format.
*/
mkldnn::memory *CreateMKLDNNData(
const mkldnn::memory::primitive_desc &desc);
/*
* These are the async version of the methods above.
* It changes the layout of this NDArray, but it happens after all accesses to
* the array are complete.
*/
void Reorder2DefaultAsync();
void MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc);
/*
* This creates a new NDArray with the reordered data.
* It doesn't affect the data of the original NDArray.
*/
NDArray Reorder2Default() const;
void InvalidateMKLDNNData() {
// Removing mkl_mem_ means the NDArray will store data in the default format.
ptr_->mkl_mem_ = nullptr;
}
/*
* This function is used inside operators to reshape an array.
* It doesn't change the layout of the original array and allocate memory from
* the temporary buffer. The returned array is only valid inside the current
* invocation of this operator.
* This is different from Reshape. Reshape will cause data in the array to be
* converted to the default layout and allocate memory from malloc directly,
* which can be expensive.
* It's used by FullyConnected right now.
*/
NDArray MKLDNNDataReshape(const TShape &shape) const;
#endif
/*!
* \brief Save list of ndarray into the Stream.x
* \param fo The stream of output.
* \param data the NDArrays to be saved.
* \param names the name of the NDArray, optional, can be zero length.
*/
static void Save(dmlc::Stream* fo,
const std::vector<NDArray>& data,
const std::vector<std::string>& names);
/*!
* \brief Load list of ndarray into from the stream.
* \param fi The stream of the input file.
* \param data the NDArrays to be loaded
* \param keys the name of the NDArray, if saved in the file.
*/
static void Load(dmlc::Stream* fi,
std::vector<NDArray>* data,
std::vector<std::string>* keys);
private:
friend class Imperative;
/*! \brief the real data chunk that backs NDArray */
// shandle is used to store the actual values in the NDArray
// aux_handles store the aux data(such as indices) if it's needed by non-default storage.
struct Chunk {
/*! \brief storage handle from storage engine.
for non-default storage, shandle stores the data(value) array.
*/
Storage::Handle shandle;
/*! \brief storage handles for aux data (e.g index)
for row_sparse, aux_handles[0] = indices
for csr, aux_handles[0] = indptr, aux_handles[1] = indices
*/
std::vector<Storage::Handle> aux_handles;
#if MXNET_USE_MKLDNN == 1
/*! This is created when data is stored in MKLDNN format.
*/
std::shared_ptr<MKLDNNMemory> mkl_mem_;
#endif
/*! \brief variable from engine */
Engine::VarHandle var;
/*!
* \brief if this is true, this means the data do not come
* from Storage, and do not need to be freed
*/
/*! \brief construct from static data */
bool static_data;
/*! \brief whether data allocation is delayed. This doesn't indicate whether aux data
allocation is delayed. */
bool delay_alloc;
// the type of the storage. The storage_type is never kUndefinedStorage once the chunk
// is constructed.
NDArrayStorageType storage_type = kDefaultStorage;
/*! \brief type of aux */
std::vector<int> aux_types;
// context of data
Context ctx;
// The shape of the chunk data.
// This might not be the same shape as the NDArray, since the storage may be sparse.
// The default value for storage_shape is {0} when an empty non-default NDArray is created.
TShape storage_shape;
// The shape of aux data. The default value for the shape depends on the type of storage.
// If aux_shapes[i].Size() is zero, aux data i is empty.
std::vector<TShape> aux_shapes;
/*! \brief default cosntructor */
Chunk() : static_data(true), delay_alloc(false) {}
/*! \brief construct a new chunk */
Chunk(TShape shape, Context ctx_, bool delay_alloc_, int dtype)
: static_data(false), delay_alloc(true), ctx(ctx_) {
auto size = shape.Size();
storage_shape = shape;
var = Engine::Get()->NewVariable();
shandle.size = size * mshadow::mshadow_sizeof(dtype);
shandle.ctx = ctx_;
if (!delay_alloc_) this->CheckAndAlloc();
}
Chunk(const TBlob &data, int dev_id)
: static_data(true), delay_alloc(false) {
CHECK(storage_type == kDefaultStorage);
var = Engine::Get()->NewVariable();
if (data.dev_mask() == cpu::kDevMask) {
ctx = Context::CPU();
} else {
CHECK_EQ(data.dev_mask(), gpu::kDevMask);
ctx = Context::GPU(dev_id);
}
// init shandle
shandle.ctx = ctx;
shandle.dptr = data.dptr_;
shandle.size = data.shape_.Size() * mshadow::mshadow_sizeof(data.type_flag_);
storage_shape = data.shape_;
}
Chunk(int shared_pid, int shared_id, const TShape& shape, int dtype)
: static_data(false), delay_alloc(false) {
var = Engine::Get()->NewVariable();
ctx = Context::CPUShared(0);
shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
shandle.ctx = ctx;
shandle.shared_pid = shared_pid;
shandle.shared_id = shared_id;
Storage::Get()->Alloc(&shandle);
storage_shape = shape;
}
// Constructor for a non-default storage chunk
Chunk(NDArrayStorageType storage_type_, const TShape &storage_shape_, Context ctx_,
bool delay_alloc_, int dtype, const std::vector<int> &aux_types_,
const std::vector<TShape> &aux_shapes_)
: static_data(false), delay_alloc(delay_alloc_), storage_type(storage_type_),
aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_),
aux_shapes(aux_shapes_) {
shandle.ctx = ctx;
var = Engine::Get()->NewVariable();
// aux_handles always reflect the correct number of aux data
for (size_t i = 0; i < aux_shapes.size(); i++) {
CheckAndAllocAuxData(i, aux_shapes[i]);
// this line is needed in case when aux_shapes[i].Size() = 0
// aux_handles[i] will not be updated and take only default value.
aux_handles[i].ctx = ctx;
}
if (!delay_alloc) {
CheckAndAllocData(storage_shape, dtype);
}
}
Chunk(const NDArrayStorageType storage_type_, const TBlob &data,
const std::vector<TBlob> &aux_data, int dev_id)
: static_data(true), delay_alloc(false), storage_type(storage_type_) {
using namespace mshadow;
CHECK_NE(storage_type, kDefaultStorage);
// init var
var = Engine::Get()->NewVariable();
// init ctx
if (data.dev_mask() == cpu::kDevMask) {
ctx = Context::CPU();
} else {
CHECK_EQ(data.dev_mask(), gpu::kDevMask);
ctx = Context::GPU(dev_id);
}
// init shandle
shandle.ctx = ctx;
shandle.dptr = data.dptr_;
shandle.size = data.shape_.Size() * mshadow_sizeof(data.type_flag_);
storage_shape = data.shape_;
// init aux handles
for (const auto &aux : aux_data) {
Storage::Handle aux_handle;
aux_handle.ctx = ctx;
aux_handle.dptr = aux.dptr_;
aux_handle.size = aux.shape_.Size() * mshadow_sizeof(aux.type_flag_);
aux_handles.push_back(aux_handle);
aux_types.emplace_back(aux.type_flag_);
aux_shapes.emplace_back(aux.shape_);
}
}
/*! \brief set the shape for ith aux data, and update storage shape if necessary */
inline void set_aux_shape(const size_t i, const TShape& shape) {
aux_shapes[i] = shape;
if (storage_shape.ndim() > 0) {
if (storage_type == kRowSparseStorage && i == rowsparse::kIdx) {
storage_shape[0] = shape[0];
} else if (storage_type == kCSRStorage && i == csr::kIdx) {
storage_shape[0] = shape[0];
}
}
}
/*! \brief check if delay alloc is on, do alloc if not yet done */
inline void CheckAndAlloc(void) {
if (delay_alloc) {
shandle = Storage::Get()->Alloc(shandle.size, shandle.ctx);
#if MXNET_USE_MKLDNN == 1
mkl_mem_ = nullptr;
#endif
delay_alloc = false;
}
}
/*! \brief Check and alloc memory for a dense ndarray */
// size is the number of bytes
void CheckAndAlloc(uint64_t dbytes) {
CHECK_EQ(kDefaultStorage, storage_type)
<< "CheckAndAlloc(dbytes) is only intended for kDefaultStorage";
dbytes = std::max(dbytes, static_cast<uint64_t>(shandle.size));
if (delay_alloc) {
shandle = Storage::Get()->Alloc(dbytes, shandle.ctx);
#if MXNET_USE_MKLDNN == 1
mkl_mem_ = nullptr;
#endif
delay_alloc = false;
} else if (shandle.size < dbytes) {
// free storage if necessary and alloc again
if (shandle.size > 0) Storage::Get()->Free(shandle);
// init storage
shandle = Storage::Get()->Alloc(dbytes, shandle.ctx);
#if MXNET_USE_MKLDNN == 1
mkl_mem_ = nullptr;
#endif
}
}
inline void CheckAndAlloc(const TShape &shape, const std::vector<TShape> &aux_shapes,
int dtype) {
// calculate size, perform allocation
if (kRowSparseStorage == storage_type) {
// For row sparse, aux_shape indicates the number of rows to allocate
auto aux_shape = aux_shapes[rowsparse::kIdx];
CheckAndAllocAuxData(rowsparse::kIdx, aux_shape);
TShape storage_shape(shape);
storage_shape[0] = aux_shape[0];
CheckAndAllocData(storage_shape, dtype);
} else if (kCSRStorage == storage_type) {
CheckAndAllocAuxData(csr::kIndPtr, aux_shapes[csr::kIndPtr]);
CheckAndAllocAuxData(csr::kIdx, aux_shapes[csr::kIdx]);
CheckAndAllocData(aux_shapes[csr::kIdx], dtype);
} else {
LOG(FATAL) << "Storage type " << storage_type << " not implemented for CheckAndAlloc";
}
}
// create storage handle for data based on shape and dtype, assuming ctx is set
// storage shape is also updated
// if data is already allocated, try reuse the storage. Otherwise, free the current one
// and allocate new storage
void CheckAndAllocData(const TShape &shape, int dtype);
#if MXNET_USE_MKLDNN == 1
// Have MKL memory reference to the data in the default storage
// or create memory for MKLDNN.
void SetMKLMem(const TShape &shape, int dtype);
// If the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and
// save the result in shandle.
void Reorder2Default();
// Reroder data to a specified layout.
void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc);
bool IsMKLDNN() const;
bool IsDefault() const;
#endif
// create storage handle for aux data based on shape
// this function assumes ctx, aux shapes and aux types are set
// aux shape is also updated
// if aux data is already allocated, try reuse the storage. Otherwise, free the current one
// and allocate new storage
inline void CheckAndAllocAuxData(size_t i, const TShape &shape) {
CHECK_EQ(shape.ndim(), 1) << "shape must be 1D in CheckAndAllocAuxData";
CHECK_NE(storage_type, kUndefinedStorage)
<< "storage type cannot be kUndefinedStorage in CheckAndAllocAuxData";
CHECK_NE(storage_type, kDefaultStorage)
<< "storage type cannot be kDefaultStorage in CheckAndAllocAuxData";
if (aux_handles.size() <= i) {
aux_handles.resize(i + 1);
}
size_t aux_bytes = shape.Size() * mshadow::mshadow_sizeof(aux_types[i]);
if (aux_handles[i].size < aux_bytes) {
// free storage if necessary and alloc again
if (aux_handles[i].size > 0) Storage::Get()->Free(aux_handles[i]);
// init aux storage
aux_handles[i] = Storage::Get()->Alloc(aux_bytes, ctx);
}
// init shape
set_aux_shape(i, shape);
}
/*! \brief destructor */
~Chunk();
}; // struct Chunk
void SetTBlob() const;
/*! \brief internal data of NDArray */
std::shared_ptr<Chunk> ptr_{nullptr};
/*! \brief shape of current NDArray */
TShape shape_;
/*! \brief byte offset in chunk */
size_t byte_offset_ = 0;
/*! \brief type of data */
int dtype_ = -1;
/*! \brief whether the NDArray uses memory of another NDArray. */
bool reuse_ = false;
/*! \brief storage type of data */
NDArrayStorageType storage_type_ = kUndefinedStorage;
/*! \brief node entry for autograd */
nnvm::NodeEntry entry_;
/*!
* \brief internal TBlob
* \note When user access tblob_ by some const methods like
* NDArray::data(), the dptr in tblob_ still need to be updated
* in case that allocation happens. So we make it mutable for
* this situation.
*/
mutable TBlob tblob_;
}; // class NDArray
/*!
* \return the number of aux data used for given storage type
*/
size_t num_aux_data(NDArrayStorageType stype);
/*!
* \brief issue an copy operation from one NDArray to another
* the two ndarray can sit on different devices