Labelbox · sfendell-labelbox · Mar 28, 2024 · Mar 27, 2024
@@ -744,20 +744,18 @@ def wait_for_data_row_processing():
     DataRow be fully processed with media_attributes
     """
 
-    def func(client, data_row, compare_with_prev_media_attrs=False):
+    def func(client, data_row, custom_check=None):
         """
         added check_updated_at because when a data_row is updated from say
         an image to pdf, it already has media_attributes and the loop does
         not wait for processing to a pdf
         """
-        prev_media_attrs = data_row.media_attributes if compare_with_prev_media_attrs else None
         data_row_id = data_row.uid
         timeout_seconds = 60
         while True:
             data_row = client.get_data_row(data_row_id)
-            if data_row.media_attributes and (prev_media_attrs is None or
-                                              prev_media_attrs
-                                              != data_row.media_attributes):
+            passed_custom_check = not custom_check or custom_check(data_row)
+            if data_row.media_attributes and passed_custom_check:
                 return data_row
             timeout_seconds -= 2
             if timeout_seconds <= 0:

@@ -593,9 +593,10 @@ def test_data_row_update(client, dataset, rand_gen, image_url,
     pdf_url = "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf"
     tileLayerUrl = "https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json"
     data_row.update(row_data={'pdfUrl': pdf_url, "tileLayerUrl": tileLayerUrl})
+    custom_check = lambda data_row: data_row.row_data and 'pdfUrl' not in data_row.row_data
     data_row = wait_for_data_row_processing(client,
                                             data_row,
-                                            compare_with_prev_media_attrs=True)
+                                            custom_check=custom_check)
     assert data_row.row_data == pdf_url