forked from kylesargent/ZeroNVS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlaunch_noSDS.py
77 lines (62 loc) · 2.85 KB
/
launch_noSDS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# coordinate system used for cond_camera and target_camera: opengl
# similar to: threestudio/models/guidance/zero123_guidance.py
from ldm.models.diffusion import options
options.LDM_DISTILLATION_ONLY = True
from threestudio.models.guidance import zero123_guidance
from omegaconf import OmegaConf
# omegaconf is a library for parsing config such as yaml or dictionary.
# which supports merge, load, override...
# reference launch_inference.sh
# zeronvs_config.yaml is the pretrained config, zero123_scene.yaml is the main config
# launch_inference.sh will override some values in zero123_scene
image_path = "data/image_test/000000_rgb.png" # 384x384, Expected size 32 but got size 48
# image_path = "rs_dtu_4/DTU/scan6/image/000000.png" # 400x300, Expected size 32 but got size 37
guidance_cfg = dict(
pretrained_model_name_or_path= "zeronvs.ckpt",
pretrained_config= "zeronvs_config.yaml",
guidance_scale= 7.5,
cond_image_path =image_path,
min_step_percent=[0,.75,.02,1000],
max_step_percent=[1000, 0.98, 0.025, 2500],
vram_O=False # whether optimize ram use
)
guidance = zero123_guidance.Zero123Guidance(OmegaConf.create(guidance_cfg))
from PIL import Image
import numpy as np
import torch
cond_image_pil = Image.open(image_path).convert("RGB")
cond_image_pil = cond_image_pil.resize((256, 256)) # XXX
cond_image = torch.from_numpy(np.array(cond_image_pil)).cuda() / 255.
c_crossattn, c_concat = guidance.get_img_embeds(
cond_image.permute((2, 0, 1))[None]) # change (H, W, C) to (C, H, W)
# XXX HERE conditional camera and target camera
cond_camera = np.eye(4) # identity camera pose
target_camera = cond_camera.copy()
target_camera[:3, -1] = np.array([.125, .125, .125]) # perturb the cond pose
target_camera = torch.from_numpy(target_camera[None]).cuda().to(torch.float32)
cond_camera = torch.from_numpy(cond_camera[None]).cuda().to(torch.float32)
camera_batch = {
"target_cam2world": target_camera, # need pose matrix
"cond_cam2world": cond_camera,
"fov_deg": torch.from_numpy(np.array([45.0])).cuda().to(torch.float32) # what is this?
}
guidance.cfg.precomputed_scale=.7
cond = guidance.get_cond_from_known_camera(
camera_batch,
c_crossattn=c_crossattn,
c_concat=c_concat,
# precomputed_scale=.7,
)
print("------camerabatch--------")
print(camera_batch["cond_cam2world"].shape) # (1,4,4)
print(camera_batch["target_cam2world"].shape) # (1,4,4)
print("------crossattn----------")
print(c_crossattn.shape) # (1,1,768)
print("--------concat------------")
print(c_concat.shape) # (1,4,48,48) Expected size 32 but got size 48 for tensor number 1 in the list.
novel_view = guidance.gen_from_cond(cond)
novel_view_pil = Image.fromarray(np.clip(novel_view[0]*255, 0, 255).astype(np.uint8))
# display(cond_image_pil)
# display(novel_view_pil)
cond_image_pil.save("data/image_output/cond.png")
novel_view_pil.save("data/image_output/novel.png")