NCAS-CMS · davidhassell · Apr 4, 2022 · Mar 28, 2022 · Apr 1, 2022 · Apr 1, 2022
diff --git a/cf/data/__init__.py b/cf/data/__init__.py
@@ -1,3 +1,5 @@
+from .abstract import FileArray
+
 from .cachedarray import CachedArray
 from .netcdfarray import NetCDFArray
 from .umarray import UMArray

diff --git a/cf/data/data.py b/cf/data/data.py
@@ -47,6 +47,7 @@
 from ..mixin_container import Container
 from ..units import Units
 from . import (  # GatheredSubarray,; RaggedContiguousSubarray,; RaggedIndexedContiguousSubarray,; RaggedIndexedSubarray,
+    FileArray,
     NetCDFArray,
     UMArray,
 )
@@ -9414,37 +9415,43 @@ def insert_dimension(self, position=0, inplace=False):
 
         return d
 
+    @daskified(_DASKIFIED_VERBOSE)
     def get_filenames(self):
         """Return the names of files containing parts of the data array.
 
         :Returns:
 
             `set`
-                The file names in normalized, absolute form. If the data
-                is are memory then an empty `set` is returned.
+                The file names in normalized, absolute form. If the
+                data is in memory then an empty `set` is returned.
 
-        **Examples:**
+        **Examples**
+
+        >>> f = cf.NetCDFArray(TODODASK)
+        >>> d = cf.Data(f)
+        >>> d.get_filenames()
+        {TODODASK}
 
-        >>> f = cf.read('../file[123]')[0]
-        >>> f.get_filenames()
-        {'/data/user/file1',
-         '/data/user/file2',
-         '/data/user/file3'}
-        >>> a = f.array
-        >>> f.get_filenames()
+        >>> d = cf.Data([1, 2, 3])
+        >>> d.get_filenames()
         set()
 
         """
-        print("TODODASK - is this still possible?")
-        out = set(
-            [
-                abspath(p.subarray.get_filename())
-                for p in self.partitions.matrix.flat
-                if p.in_file
-            ]
-        )
-        out.discard(None)
+        out = set()
 
+        dx = self._get_dask()
+        hlg = dx.dask
+        dsk = hlg.to_dict()
+        for key, value in hlg.get_all_dependencies().items():
+            if value:
+                continue
+
+            # This key has no dependencies, and so is raw data.
+            a = dsk[key]
+            if isinstance(a, FileArray):
+                out.add(abspath(a.get_filename()))
+
+        out.discard(None)
         return out
 
     @daskified(_DASKIFIED_VERBOSE)

diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py
@@ -3922,6 +3922,10 @@ def test_Data_set_units(self):
         with self.assertRaises(ValueError):
             d.set_units("km")
 
+    @unittest.skipIf(TEST_DASKIFIED_ONLY, "Needs updated NetCDFArray to test")
+    def test_Data_get_filenames(self):
+        pass
+
     def test_Data_tolist(self):
         for x in (1, [1, 2], [[1, 2], [3, 4]]):
             d = cf.Data(x)