zhiqwang · zhiqwang · Feb 5, 2022 · Feb 5, 2022 · Feb 5, 2022 · Feb 5, 2022
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -55,7 +55,7 @@
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
 html_favicon = "_static/favicon.svg"
-html_logo = "_static/yolort_logo.png"
+html_logo = "_static/yolort_logo_icon.png"
 
 mathjax_path = "mathjax/tex-chtml.js"
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -81,6 +81,7 @@ And we support loading the trained weights from YOLOv5:
    :caption: Tutorials
 
    notebooks/inference-pytorch-export-libtorch
+   notebooks/comparison-between-yolort-vs-yolov5
    notebooks/how-to-align-with-ultralytics-yolov5
    notebooks/anchor-label-assignment-visualization
    notebooks/model-graph-visualization

diff --git a/docs/source/notebooks/comparison-between-yolort-vs-yolov5.ipynb b/docs/source/notebooks/comparison-between-yolort-vs-yolov5.ipynb
@@ -0,0 +1 @@
+../../../notebooks/comparison-between-yolort-vs-yolov5.ipynb
diff --git a/notebooks/comparison-between-yolort-vs-yolov5.ipynb b/notebooks/comparison-between-yolort-vs-yolov5.ipynb
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -109,7 +109,7 @@ def test_read_image_to_tensor():
 
 
 def test_get_image_from_url():
-    url = "https://raw.githubusercontent.com/zhiqwang/yolov5-rt-stack/master/test/assets/zidane.jpg"
+    url = "https://huggingface.co/spaces/zhiqwang/assets/resolve/main/zidane.jpg"
     img = get_image_from_url(url)
     assert isinstance(img, np.ndarray)
     assert tuple(img.shape) == (720, 1280, 3)

diff --git a/yolort/models/transform.py b/yolort/models/transform.py
@@ -89,13 +89,15 @@ def _resize_image_and_masks(
 class YOLOTransform(nn.Module):
     """
     Performs input / target transformation before feeding the data to a YOLO model. It plays
-    the same role of `LetterBox`, and YOLOv5 adopt (0, 1, RGB) as the default mean, std and
-    channel mode. We do not normalize below, the inputs need to be scaled down to float [0-1]
-    from int[0-255] and transpose the image channel to RGB before being fed to this transformation.
+    the same role of `LetterBox` in YOLOv5. YOLOv5 use (0, 1, RGB) as the default mean, std and
+    color channel mode. We do not normalize below, the inputs need to be scaled down to float
+    in [0-1] from uint8 in [0-255] and transpose the color channel to RGB before being fed to
+    this transformation. We use the `torch.nn.functional.interpolate` and `torch.nn.functional.pad`
+    ops to implement the `LetterBox` to make it jit traceable and scriptable.
 
     The transformations it perform are:
-        - input / target resizing to get a rectangle within shape `(height, width)` that
-            can be divided by `size_divisible`
+        - resizing input / target that maintains the aspect ratio within shape `(height, width)`
+        - letterboxing padding the input / target that can be divided by `size_divisible`
 
     It returns a `NestedTensor` for the inputs, and a List[Dict[Tensor]] for the targets.
 

diff --git a/yolort/models/yolo_module.py b/yolort/models/yolo_module.py
@@ -21,8 +21,7 @@
 
 class YOLOv5(LightningModule):
     """
-    Wrapping the pre-processing into YOLO models, we use the `torch.nn.functional.interpolate`
-    and `torch.nn.functional.pad` ops to implement the letterbox to make it scriptable.
+    Wrapping the pre-processing (`LetterBox`) into the YOLO models.
 
     Args:
         lr (float): The initial learning rate
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../../notebooks/comparison-between-yolort-vs-yolov5.ipynb