Multi-threaded VisDrone and VOC downloads (ultralytics#7108)

* Multi-threaded VOC download * Update VOC.yaml * Update * Update general.py * Update general.py
fhkiel-mlaip · Mar 23, 2022 · e08005c · e08005c
1 parent 7bfb30f
commit e08005c
Show file tree

Hide file tree

Showing 7 changed files with 13 additions and 6 deletions.
diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
@@ -34,6 +34,7 @@ names: ['wheat_head']  # class names
 download: |
   from utils.general import download, Path
 
+
   # Download
   dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',

diff --git a/data/Objects365.yaml b/data/Objects365.yaml
@@ -65,6 +65,7 @@ download: |
 
   from utils.general import Path, download, np, xyxy2xywhn
 
+
   # Make Directories
   dir = Path(yaml['path'])  # dataset root dir
   for p in 'images', 'labels':

diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
@@ -24,6 +24,7 @@ download: |
   from tqdm import tqdm
   from utils.general import np, pd, Path, download, xyxy2xywh
 
+
   # Download
   dir = Path(yaml['path'])  # dataset root dir
   parent = Path(dir.parent)  # download dir

diff --git a/data/VOC.yaml b/data/VOC.yaml
@@ -62,7 +62,7 @@ download: |
   urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
           url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
           url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
-  download(urls, dir=dir / 'images', delete=False)
+  download(urls, dir=dir / 'images', delete=False, threads=3)
 
   # Convert
   path = dir / f'images/VOCdevkit'

diff --git a/data/VisDrone.yaml b/data/VisDrone.yaml
@@ -54,7 +54,7 @@ download: |
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
-  download(urls, dir=dir)
+  download(urls, dir=dir, threads=4)
 
   # Convert
   for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':

diff --git a/data/coco.yaml b/data/coco.yaml
@@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
 download: |
   from utils.general import download, Path
 
+
   # Download labels
   segments = False  # segment or box labels
   dir = Path(yaml['path'])  # dataset root dir

diff --git a/utils/general.py b/utils/general.py
@@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
     if val:
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
-            LOGGER.info('\nDataset not found, missing paths: %s' % [str(x) for x in val if not x.exists()])
+            LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
             if s and autodownload:  # download script
+                t = time.time()
                 root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
                 if s.startswith('http') and s.endswith('.zip'):  # URL
                     f = Path(s).name  # filename
@@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
                     r = os.system(s)
                 else:  # python script
                     r = exec(s, {'yaml': data})  # return None
-                LOGGER.info(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
+                dt = f'({round(time.time() - t, 1)}s)'
+                s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
+                LOGGER.info(emojis(f"Dataset download {s}"))
             else:
-                raise Exception('Dataset not found.')
+                raise Exception(emojis('Dataset not found ❌'))
 
     return data  # dictionary
 
@@ -491,7 +494,7 @@ def download_one(url, dir):
             if curl:
                 os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -")  # curl download, retry and resume on fail
             else:
-                torch.hub.download_url_to_file(url, f, progress=True)  # torch download
+                torch.hub.download_url_to_file(url, f, progress=threads == 1)  # torch download
         if unzip and f.suffix in ('.zip', '.gz'):
             LOGGER.info(f'Unzipping {f}...')
             if f.suffix == '.zip':