From f41c26697545d12d31ec3a4d2ea48aaa821ed8d4 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 23 Jul 2024 15:15:26 +0800 Subject: [PATCH 1/3] Use tensor in `window_reverse` to avoid precision issue --- paddlemix/models/audioldm2/clap_module/htsat_model.py | 2 +- paddlemix/models/groundingdino/backbone/swin_transformer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddlemix/models/audioldm2/clap_module/htsat_model.py b/paddlemix/models/audioldm2/clap_module/htsat_model.py index cb654ccdf..b23ba2a8a 100644 --- a/paddlemix/models/audioldm2/clap_module/htsat_model.py +++ b/paddlemix/models/audioldm2/clap_module/htsat_model.py @@ -214,7 +214,7 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) + B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size / window_size))) x = windows.reshape([B, H // window_size, W // window_size, window_size, window_size, -1]) x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) return x diff --git a/paddlemix/models/groundingdino/backbone/swin_transformer.py b/paddlemix/models/groundingdino/backbone/swin_transformer.py index 76ef99777..63b99d8c6 100644 --- a/paddlemix/models/groundingdino/backbone/swin_transformer.py +++ b/paddlemix/models/groundingdino/backbone/swin_transformer.py @@ -164,7 +164,7 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) + B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size / window_size))) x = windows.reshape([B, H // window_size, W // window_size, window_size, window_size, -1]) x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) return x From 63b213d778211366801cd23d22df415c7f6416d4 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 23 Jul 2024 15:16:51 +0800 Subject: [PATCH 2/3] use `*` --- paddlemix/models/audioldm2/clap_module/htsat_model.py | 2 +- paddlemix/models/groundingdino/backbone/swin_transformer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddlemix/models/audioldm2/clap_module/htsat_model.py b/paddlemix/models/audioldm2/clap_module/htsat_model.py index b23ba2a8a..5005878d0 100644 --- a/paddlemix/models/audioldm2/clap_module/htsat_model.py +++ b/paddlemix/models/audioldm2/clap_module/htsat_model.py @@ -214,7 +214,7 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size / window_size))) + B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size * window_size))) x = windows.reshape([B, H // window_size, W // window_size, window_size, window_size, -1]) x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) return x diff --git a/paddlemix/models/groundingdino/backbone/swin_transformer.py b/paddlemix/models/groundingdino/backbone/swin_transformer.py index 63b99d8c6..7e8cac3cd 100644 --- a/paddlemix/models/groundingdino/backbone/swin_transformer.py +++ b/paddlemix/models/groundingdino/backbone/swin_transformer.py @@ -164,7 +164,7 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size / window_size))) + B = int(windows.shape[0] / (H * W / paddle.to_tensor(window_size * window_size))) x = windows.reshape([B, H // window_size, W // window_size, window_size, window_size, -1]) x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) return x From fe34891ac48e68ad24b8661728764b3879371df4 Mon Sep 17 00:00:00 2001 From: SigureMo Date: Tue, 23 Jul 2024 15:40:45 +0800 Subject: [PATCH 3/3] manual cast in predict script --- deploy/groundingdino/predict.py | 2 +- paddlemix/examples/groundingdino/run_predict.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/groundingdino/predict.py b/deploy/groundingdino/predict.py index b2d11ff94..b49a41e0e 100644 --- a/deploy/groundingdino/predict.py +++ b/deploy/groundingdino/predict.py @@ -123,7 +123,7 @@ def plot_boxes_to_image(image_pil, tgt): # draw boxes and masks for box, label in zip(boxes, labels): # from 0..1 to 0..W, 0..H - box = box * paddle.to_tensor([W, H, W, H]) + box = box * paddle.to_tensor([W, H, W, H]).astype(paddle.float32) # from xywh to xyxy box[:2] -= box[2:] / 2 box[2:] += box[:2] diff --git a/paddlemix/examples/groundingdino/run_predict.py b/paddlemix/examples/groundingdino/run_predict.py index dc61f0324..8641fe3df 100644 --- a/paddlemix/examples/groundingdino/run_predict.py +++ b/paddlemix/examples/groundingdino/run_predict.py @@ -40,7 +40,7 @@ def plot_boxes_to_image(image_pil, tgt): # draw boxes and masks for box, label in zip(boxes, labels): # from 0..1 to 0..W, 0..H - box = box * paddle.to_tensor([W, H, W, H]) + box = box * paddle.to_tensor([W, H, W, H]).astype(paddle.float32) # from xywh to xyxy box[:2] -= box[2:] / 2 box[2:] += box[:2]