diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index a709361807..e808250c32 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -39,6 +39,7 @@ get_train_eval_split_interval, ) from nerfstudio.process_data.colmap_utils import parse_colmap_camera_params +from nerfstudio.utils.misc import set_pil_image_size_limit from nerfstudio.utils.rich_utils import CONSOLE, status from nerfstudio.utils.scripts import run_command @@ -481,7 +482,8 @@ def calculate_scaled_size(original_width, original_height, downscale_factor, mod # Using %05d ffmpeg commands appears to be unreliable (skips images). for path in paths: # Compute image-wise rescaled width/height. - img = Image.open(path) + with set_pil_image_size_limit(None): + img = Image.open(path) w, h = img.size w_scaled, h_scaled = calculate_scaled_size(w, h, downscale_factor, downscale_rounding_mode) # Downscale images using ffmpeg. @@ -514,7 +516,8 @@ def get_fname(parent: Path, filepath: Path) -> Path: filepath = next(iter(image_filenames)) if self._downscale_factor is None: if self.config.downscale_factor is None: - test_img = Image.open(filepath) + with set_pil_image_size_limit(None): + test_img = Image.open(filepath) w, h = test_img.size max_res = max(h, w) df = 0 diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index e11902c094..32d88f42f3 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -34,6 +34,7 @@ get_train_eval_split_interval, ) from nerfstudio.utils.io import load_from_json +from nerfstudio.utils.misc import set_pil_image_size_limit from nerfstudio.utils.rich_utils import CONSOLE MAX_AUTO_RESOLUTION = 1600 @@ -469,7 +470,8 @@ def _get_fname(self, filepath: Path, data_dir: Path, downsample_folder_prefix="i if self.downscale_factor is None: if self.config.downscale_factor is None: - test_img = Image.open(data_dir / filepath) + with set_pil_image_size_limit(None): + test_img = Image.open(data_dir / filepath) h, w = test_img.size max_res = max(h, w) df = 0 diff --git a/nerfstudio/data/datasets/base_dataset.py b/nerfstudio/data/datasets/base_dataset.py index 449720bd36..87a51ca638 100644 --- a/nerfstudio/data/datasets/base_dataset.py +++ b/nerfstudio/data/datasets/base_dataset.py @@ -33,6 +33,7 @@ from nerfstudio.cameras.cameras import Cameras from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs from nerfstudio.data.utils.data_utils import get_image_mask_tensor_from_path +from nerfstudio.utils.misc import set_pil_image_size_limit class InputDataset(Dataset): @@ -66,7 +67,8 @@ def get_numpy_image(self, image_idx: int) -> npt.NDArray[np.uint8]: image_idx: The image index in the dataset. """ image_filename = self._dataparser_outputs.image_filenames[image_idx] - pil_image = Image.open(image_filename) + with set_pil_image_size_limit(None): + pil_image = Image.open(image_filename) if self.scale_factor != 1.0: width, height = pil_image.size newsize = (int(width * self.scale_factor), int(height * self.scale_factor)) diff --git a/nerfstudio/data/datasets/depth_dataset.py b/nerfstudio/data/datasets/depth_dataset.py index f2f1ab86ad..763dabd1e3 100644 --- a/nerfstudio/data/datasets/depth_dataset.py +++ b/nerfstudio/data/datasets/depth_dataset.py @@ -29,7 +29,7 @@ from nerfstudio.data.datasets.base_dataset import InputDataset from nerfstudio.data.utils.data_utils import get_depth_image_from_path from nerfstudio.model_components import losses -from nerfstudio.utils.misc import torch_compile +from nerfstudio.utils.misc import set_pil_image_size_limit, torch_compile from nerfstudio.utils.rich_utils import CONSOLE @@ -79,7 +79,8 @@ def __init__(self, dataparser_outputs: DataparserOutputs, scale_factor: float = for i in track(range(len(filenames)), description="Generating depth images"): image_filename = filenames[i] - pil_image = Image.open(image_filename) + with set_pil_image_size_limit(None): + pil_image = Image.open(image_filename) image = np.array(pil_image, dtype="uint8") # shape is (h, w) or (h, w, 3 or 4) if len(image.shape) == 2: image = image[:, :, None].repeat(3, axis=2) diff --git a/nerfstudio/data/utils/data_utils.py b/nerfstudio/data/utils/data_utils.py index 11ce74d9da..f2ee6eb70f 100644 --- a/nerfstudio/data/utils/data_utils.py +++ b/nerfstudio/data/utils/data_utils.py @@ -22,12 +22,15 @@ import torch from PIL import Image +from nerfstudio.utils.misc import set_pil_image_size_limit + def get_image_mask_tensor_from_path(filepath: Path, scale_factor: float = 1.0) -> torch.Tensor: """ Utility function to read a mask image from the given path and return a boolean tensor """ - pil_mask = Image.open(filepath) + with set_pil_image_size_limit(None): + pil_mask = Image.open(filepath) if scale_factor != 1.0: width, height = pil_mask.size newsize = (int(width * scale_factor), int(height * scale_factor)) @@ -47,7 +50,8 @@ def get_semantics_and_mask_tensors_from_path( """ if isinstance(mask_indices, List): mask_indices = torch.tensor(mask_indices, dtype=torch.int64).view(1, 1, -1) - pil_image = Image.open(filepath) + with set_pil_image_size_limit(None): + pil_image = Image.open(filepath) if scale_factor != 1.0: width, height = pil_image.size newsize = (int(width * scale_factor), int(height * scale_factor)) diff --git a/nerfstudio/generative/deepfloyd.py b/nerfstudio/generative/deepfloyd.py index a2faa9b75b..caae85243d 100644 --- a/nerfstudio/generative/deepfloyd.py +++ b/nerfstudio/generative/deepfloyd.py @@ -206,8 +206,7 @@ def prompt_to_image( Returns: The generated image. """ - - from diffusers import DiffusionPipeline, IFPipeline as IFOrig + from diffusers import DiffusionPipeline, IFPipeline from diffusers.pipelines.deepfloyd_if import IFPipelineOutput as IFOutputOrig prompts = [prompts] if isinstance(prompts, str) else prompts @@ -215,7 +214,7 @@ def prompt_to_image( assert isinstance(self.pipe, DiffusionPipeline) prompt_embeds, negative_embeds = self.pipe.encode_prompt(prompts, negative_prompt=negative_prompts) - assert isinstance(self.pipe, IFOrig) + assert isinstance(self.pipe, IFPipeline) model_output = self.pipe( prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_embeds, generator=generator ) diff --git a/nerfstudio/process_data/realitycapture_utils.py b/nerfstudio/process_data/realitycapture_utils.py index 7cf2696ae4..ea83b3ae70 100644 --- a/nerfstudio/process_data/realitycapture_utils.py +++ b/nerfstudio/process_data/realitycapture_utils.py @@ -24,6 +24,7 @@ from PIL import Image from nerfstudio.process_data.process_data_utils import CAMERA_MODELS +from nerfstudio.utils.misc import set_pil_image_size_limit from nerfstudio.utils.rich_utils import CONSOLE @@ -71,7 +72,8 @@ def realitycapture_to_json( continue frame = {} - img = np.array(Image.open(output_dir / image_filename_map[basename])) + with set_pil_image_size_limit(None): + img = np.array(Image.open(output_dir / image_filename_map[basename])) height, width, _ = img.shape frame["h"] = int(height) frame["w"] = int(width) diff --git a/nerfstudio/utils/misc.py b/nerfstudio/utils/misc.py index f55e1259a3..74dcb1f891 100644 --- a/nerfstudio/utils/misc.py +++ b/nerfstudio/utils/misc.py @@ -16,6 +16,7 @@ Miscellaneous helper code. """ +import contextlib import platform import typing import warnings @@ -23,6 +24,7 @@ from typing import Any, Callable, Dict, List, Optional, TypeVar, Union import torch +from PIL import Image T = TypeVar("T") TKey = TypeVar("TKey") @@ -218,3 +220,17 @@ def get_orig_class(obj, default=None): finally: del frame return default + + +@contextlib.contextmanager +def set_pil_image_size_limit(max_pixels: Optional[Any]): + """By default PIL limits the max image size preventing processing or training with high resolution images. + Use this function to disable or set a custom image size limit. + + :param max_pixels: Max number of pixels for image processing in PIL. + :type max_pixels: Optional[int | None] + """ + orig = Image.MAX_IMAGE_PIXELS + Image.MAX_IMAGE_PIXELS = max_pixels + yield + Image.MAX_IMAGE_PIXELS = orig