diff --git a/.flake8 b/.flake8 index acb1a9e..b36791b 100644 --- a/.flake8 +++ b/.flake8 @@ -49,6 +49,10 @@ extend-ignore = # Google Python style is not RST until after processed by Napoleon # See https://github.com/peterjc/flake8-rst-docstrings/issues/17 RST201,RST203,RST301, + # It happens too often + C416, C419, + # This is new and cba to change the repo + S113 extend-select = # Should raise AssertionError instead of assert False B011, @@ -69,8 +73,6 @@ extend-select = # Within an except clause, raise exceptions with `raise ... from err` or `raise ... # from None` to distinguish them from errors in exception handling B904, - # Alternative to E501 regarding line length - B950, # Counterpart to W503, enforce having the operator at the start of a new line. W504, diff --git a/src/emma_policy/api/clients/feature_extractor.py b/src/emma_policy/api/clients/feature_extractor.py index 0f9c693..c41deb0 100644 --- a/src/emma_policy/api/clients/feature_extractor.py +++ b/src/emma_policy/api/clients/feature_extractor.py @@ -70,7 +70,7 @@ def extract_single_image(self, image: Union[Image.Image, ArrayLike]) -> FeatureR try: response.raise_for_status() except requests.exceptions.HTTPError as err: - raise SystemExit(err) + raise SystemExit(err) from err data = response.json() feature_response = FeatureResponse( @@ -103,7 +103,7 @@ def extract_batch_images( try: response.raise_for_status() except requests.exceptions.HTTPError as err: - raise SystemExit(err) + raise SystemExit(err) from err data = response.json() diff --git a/src/emma_policy/datamodules/coco_captioning_dataset.py b/src/emma_policy/datamodules/coco_captioning_dataset.py index 9da3dd4..be3a6e5 100644 --- a/src/emma_policy/datamodules/coco_captioning_dataset.py +++ b/src/emma_policy/datamodules/coco_captioning_dataset.py @@ -29,7 +29,6 @@ def __init__( merged_annotations: bool = True, is_train: bool = True, ) -> None: - if not merged_annotations: raise NotImplementedError( "Expecting dbs where every instance is an image associated with all of its captions." diff --git a/src/emma_policy/datamodules/nlvr2_dataset.py b/src/emma_policy/datamodules/nlvr2_dataset.py index 968b0f9..7b733bb 100644 --- a/src/emma_policy/datamodules/nlvr2_dataset.py +++ b/src/emma_policy/datamodules/nlvr2_dataset.py @@ -27,7 +27,6 @@ def __init__( max_frames: int = 0, use_task_prefix: bool = False, ) -> None: - super().__init__( dataset_db_path=dataset_db_path, tokenizer=tokenizer, max_frames=max_frames ) diff --git a/src/emma_policy/datamodules/pretrain_dataset.py b/src/emma_policy/datamodules/pretrain_dataset.py index ceff55d..3083d98 100644 --- a/src/emma_policy/datamodules/pretrain_dataset.py +++ b/src/emma_policy/datamodules/pretrain_dataset.py @@ -833,7 +833,6 @@ def _region_mapping( width: int, height: int, ) -> tuple[torch.Tensor, torch.Tensor]: - gt_bbox = [] for region in regions: gt_bbox_coord = BoxMode.convert( @@ -894,7 +893,6 @@ def _convert_trajectory_to_text( trajectory_text.extend(split_action_name(action.api_action.action)) # Match the object to a predicted bounding box if "bbox" in action.discrete_action.args: - bbox_coord = action.discrete_action.args["bbox"] # noqa: WPS529 gt_bbox = torch.tensor( [ diff --git a/src/emma_policy/datamodules/refcoco_dataset.py b/src/emma_policy/datamodules/refcoco_dataset.py index a65cd41..2f67c83 100644 --- a/src/emma_policy/datamodules/refcoco_dataset.py +++ b/src/emma_policy/datamodules/refcoco_dataset.py @@ -35,7 +35,6 @@ def __init__( shuffle_objects: bool = False, train_with_golden_bbox_prob: float = 1.0, ) -> None: - super().__init__( dataset_db_path=dataset_db_path, tokenizer=tokenizer, diff --git a/src/emma_policy/datamodules/vqa_v2_dataset.py b/src/emma_policy/datamodules/vqa_v2_dataset.py index 34bf2a0..59040dc 100644 --- a/src/emma_policy/datamodules/vqa_v2_dataset.py +++ b/src/emma_policy/datamodules/vqa_v2_dataset.py @@ -27,7 +27,6 @@ def __init__( tokenizer: PreTrainedTokenizer, max_frames: int = 0, ) -> None: - super().__init__( dataset_db_path=dataset_db_path, tokenizer=tokenizer, diff --git a/src/emma_policy/models/decoder_emma.py b/src/emma_policy/models/decoder_emma.py index 56e40f8..b209e2e 100644 --- a/src/emma_policy/models/decoder_emma.py +++ b/src/emma_policy/models/decoder_emma.py @@ -50,7 +50,6 @@ def decoder_layer_outputs( ) -> tuple[torch.FloatTensor, ...]: """Get output from a single decoder layer.""" if self.gradient_checkpointing and self.training: - if use_cache: logger.warning( "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." diff --git a/src/emma_policy/models/loss_utils.py b/src/emma_policy/models/loss_utils.py index ce61817..2694b62 100644 --- a/src/emma_policy/models/loss_utils.py +++ b/src/emma_policy/models/loss_utils.py @@ -9,7 +9,8 @@ def tiny_value_of_dtype(dtype: torch.dtype) -> float: This is used to avoid numerical issues such as division by zero. This is different from `info_value_of_dtype(dtype).tiny` because it causes some NaN bugs. Only supports floating point - dtypes. Implementation from AllenNLP: https://github.com/allenai/allennlp/blob/39c40fe38cd2fd36b3465b0b3c031f54ec824160/allennlp/nn/util.py#L2010-L2024 + dtypes. Implementation from AllenNLP: + https://github.com/allenai/allennlp/blob/39c40fe38cd2fd36b3465b0b3c031f54ec824160/allennlp/nn/util.py#L2010-L2024 """ if not dtype.is_floating_point: raise TypeError("Only supports floating point dtypes.") diff --git a/src/emma_policy/models/model_output_emma.py b/src/emma_policy/models/model_output_emma.py index 6d4ae23..24f1545 100644 --- a/src/emma_policy/models/model_output_emma.py +++ b/src/emma_policy/models/model_output_emma.py @@ -7,7 +7,9 @@ @dataclass class EmmaSeq2SeqModelOutput(ModelOutput): - """Base class for model encoder's outputs that also contains : pre-computed hidden states that can speed up sequential decoding. + """Base class for encoder outputs. + + Also contains pre-computed hidden states that can speed up sequential decoding. Args: last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): diff --git a/src/emma_policy/models/nlvr2_emma_policy.py b/src/emma_policy/models/nlvr2_emma_policy.py index 99750f8..3cc4319 100644 --- a/src/emma_policy/models/nlvr2_emma_policy.py +++ b/src/emma_policy/models/nlvr2_emma_policy.py @@ -35,7 +35,6 @@ def __init__( nlvr2_metrics: Optional[list[str]] = None, **kwargs: Any, ) -> None: - self._tokenizer = AutoTokenizer.from_pretrained(model_name) self._pred_gt: dict[str, list[str]] = { "predictions": [], diff --git a/src/emma_policy/utils/boxes.py b/src/emma_policy/utils/boxes.py index 371cf30..7b8fb71 100644 --- a/src/emma_policy/utils/boxes.py +++ b/src/emma_policy/utils/boxes.py @@ -100,10 +100,10 @@ def _convert(self, box: torch.Tensor, from_mode: BoxMode, to_mode: BoxMode) -> t try: converted_box = convert_functions[from_mode][to_mode](box) - except KeyError: + except KeyError as err: raise NotImplementedError( f"Conversion from BoxMode {from_mode} to {to_mode} is not supported." - ) + ) from err return converted_box diff --git a/src/emma_policy/utils/masks.py b/src/emma_policy/utils/masks.py index 5423ca2..18ef5c1 100644 --- a/src/emma_policy/utils/masks.py +++ b/src/emma_policy/utils/masks.py @@ -13,7 +13,8 @@ def decompress_simbot_mask( ) -> Union[torch.Tensor, typing.NDArray[np.float64]]: """Decompress a compressed mask array. - Adopted from https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/AlexaSimbotMLToolbox/browse/refs/heads/main/--/AlexaSimbotToolbox/arena_wrapper/util/__init__.py?region=us-east-1 + Adopted from + https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/AlexaSimbotMLToolbox/browse/refs/heads/main/--/AlexaSimbotToolbox/arena_wrapper/util/__init__.py?region=us-east-1 """ mask = np.zeros((image_width, image_height)) for start_idx, run_len in compressed_mask: @@ -29,7 +30,8 @@ def compress_simbot_mask( ) -> list[list[int]]: """Compress a binary 2D array mask for the simbot arena. - Adopted from https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/AlexaSimbotMLToolbox/browse/refs/heads/main/--/AlexaSimbotToolbox/arena_wrapper/util/__init__.py?region=us-east-1 + Adopted from + https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/AlexaSimbotMLToolbox/browse/refs/heads/main/--/AlexaSimbotToolbox/arena_wrapper/util/__init__.py?region=us-east-1 """ # list of lists of run lengths for 1s, which are assumed to be less frequent. run_len_compressed: list[list[int]] = [] diff --git a/src/emma_policy/utils/samplers/distributed_weighted_sampler.py b/src/emma_policy/utils/samplers/distributed_weighted_sampler.py index 5397de0..ac6573f 100644 --- a/src/emma_policy/utils/samplers/distributed_weighted_sampler.py +++ b/src/emma_policy/utils/samplers/distributed_weighted_sampler.py @@ -17,7 +17,6 @@ def __init__( rank: Optional[int] = None, replacement: bool = True, ) -> None: - if num_replicas is None: if not torch.distributed.is_available(): raise RuntimeError("Requires distributed package to be available") diff --git a/tests/datamodules/test_datamodule_helpers.py b/tests/datamodules/test_datamodule_helpers.py index 431c96a..af2de3d 100644 --- a/tests/datamodules/test_datamodule_helpers.py +++ b/tests/datamodules/test_datamodule_helpers.py @@ -48,7 +48,6 @@ def test_simbot_target_tokens( action_text: str, emma_tokenizer: EmmaTokenizer, ) -> None: - target_encoding = emma_tokenizer.encode_plus(target_text, return_tensors="pt", truncation=True) full_target_token_ids = target_encoding.input_ids.squeeze(0) target_token_ids = mask_past_target_actions(