-
Notifications
You must be signed in to change notification settings - Fork 2
/
compositions.py
224 lines (194 loc) · 11.8 KB
/
compositions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import exrex
import numpy as np
# animal compositions are used by humans/animals only
animal_action_regexs = \
[ "lifting a (rock|box|barbell|cat|dog)",
"doing (makeup|housekeeping|gardening|exercise)",
"carrying a (bag|backpack|luggage|laptop|book|briefcase|purse|suitcase|bouquet|baby|cat|dog|teddy bear)",
"holding a (mobile phone|book|cup of water|piece of paper|flower|bouquet|pen|sign|cat|dog|teddy bear|baby|rock|leaf|mushroom|stick|fruit)",
"(sitting|sleeping) (on a table|on a chair|on a bench|on a tank|in a wheelchair|on the ground|on flying cloud)",
"swimming (in a pool|underwater|in the ocean|in a lake|in a river)( among tropical fishes)?",
"pushing a (door|table|car|wheelchair|stroller|shopping cart|bicycle|motorcycle|scooter)",
"running (in a forest|in a park|at the beach|over forest leaves|on a trail|under the moon|on a treadmill)",
"walking (in a forest|in a park|at the beach|over forest leaves|on a trail|under the moon|on a treadmill)",
"throwing (a ball|a rock|water|a dart|a frisbee|a knife|a javelin)",
"catching (a ball|an arrow|a butterfly|a fish|a leaf)",
"kicking a (ball|bottle|tree|rock|punching bag|pole|box)",
"playing (a card game|a video game|a piano|a violin|basketball|tennis)",
"riding a (bike|motorcycle|scooter|horse|car|bus|train|boat)",
"(kissing|hugging|holding) a (boy|girl|baby|lady|man|cat)",
"dancing with a (boy|girl|lady|man|villager)",
"standing (besides a friend|besides a tree|besides a car|in a river|on a table|on a stair|on a board|on a box)",
"opening a (door|window|book|bottle|jar|box|envelope|bag|pouch|wallet|suitcase)",
"pointing at (the sky|the sun|the beach|the mountains|the forest)",
"looking at (a book|a mobile phone|the screen|the sky|the sun|the beach|a UFO|a painting|a clock|a mirror)",
"drinking (a bottle of water|a cup of wine|beer|milk|a glass of juice|a cup of tea)",
"eating (a sandwich|an ice cream|a pizza|a burger|pasta|cake|sushi|soup|tacos)",
]
animal_dresses = [
"wearing a (tshirt|stormtrooper costume|superman costume|ironman armor|ski outfit|astronaut outfit|suit|baseball cap)",
"wearing (a red hat|a santa hat|a rainbow scarf|a black top hat and a monocle|pink glasses|a yellow shirt|aikido uniform|green robe)",
# This is kind of static but only for humans/animals. So we put it here.
"in a (chef outfit|firefighter outfit|police outfit|a purple wizard outfit|dress|suit|stormtrooper costume|superman costume)",
]
# static compositions are used by both humans/animals and objects
static_action_regexs = \
[
"leaning (against a wall|against a tree|against a table|on a chair|on top of a car)",
"flying (in the sky|under the sunset|in the outer space|over water|over a building)",
# Split a regex with too many candidate patterns into two lines,
# to avoid under-representation of the patterns, as the regexs are unifomly sampled.
"on (an airplane|a bus|a busy street|a grass|a roof|an escalator|a train)",
"on (a boat|a bike|a roller coaster|a scooter)",
"in (a car|a meeting|a class|a wedding|a dinner|a concert|a gym|a library|a park)",
"in (a mall|a movie theater|a hotel room|Hong Kong|Tokyo|New York)",
"at (a beach|a table|a park|a concert|a gym|a library|a mall|a movie theater|a hotel room|a theme park)",
"next to (a friend|a tree|a car|a river|a lake|a mountain|an ocean|a playground|a statue|a panda)",
"made of (metal|stainless steel|fractal flame|marble|rubber|bronze|ice)",
# Prompts below are from DreamBooth evaluation dataset
#LINK - https://github.com/google/dreambooth/blob/main/dataset/prompts_and_classes.txt
"(in the jungle|in the snow|on a cobblestone street|floating on top of water|floating in an ocean of milk)",
"on top of (pink fabric|a wooden floor|green grass with sunflowers around it|a mirror|the sidewalk in a crowded street|a dirt road|a white rug|a purple rug in a forest)",
]
static_appearances = [
# To avoid misalignment issues, we don't use "a red/purple z" as prompts.
"that is (red|purple|shiny|cube|wet)",
]
all_action_regexs = static_action_regexs + animal_action_regexs
all_dress_regexs = static_appearances + animal_dresses
all_composition_regexs = all_action_regexs + all_dress_regexs
static_composition_regexs = static_action_regexs + static_appearances
# Prompt with locations will be combined with a common animal/human.
# E.g. "a z at the left, a dog in the center"
all_locations = [ "at the left", "at the right", "at the top", "at the bottom",
"in the center", "in the middle", "at the upper left", "at the upper right",
"at the lower left", "at the lower right", "in the background",
]
coexist_objects = [ "person", "man", "woman", "girl", "boy", "baby", "crowd", "villager",
"cat", "dog", "bird", "panda", "monkey", "chimpanzee", "gorilla", "bear",
"horse", "sheep", "elephant", "lion"
# No need to include non-animals below. They tend not to mix features with subjects.
# "stone", "tree", "flower", "rock", "mountain", "grass", "cloud", "sun",
# "moon", "stars", "fire", "lake", "ocean", "river", "beach", "village",
# "house", "car", "bus", "train", "boat", "bike", "building", "tower"
]
# added "style/art" behind some prompt
all_styles = [ "cartoon style", "animation", "anime art", "comic book art", "steampunk art", "oil on canvas", "oil painting",
"sci-fi movie", "sculpture", "bronze sculpture", "abyss art", "blade runner style", "cyberpunk art",
"synthwave", "pencil sketch", "pastel colors", "childrens book's illustration", "pixar movie",
"as a crochet figure", "as a 3d model", "closeup shot", "close view", "D&D sci-fi",
"pop art", "portrait art", "watercolour painting", "chalk art", "concepture art", "bauhaus style",
"photorealistic painting", "surrealism painting", "impressionism", "expressionism", "abstract art", "minimalism",
"low poly", "cubism style", "funko pop",
"concept art", "realistic painting", "character design", "anime sketch",
"trending in artstation", "vivid colors", "clear face",
"detailed face", "semirealism", "octane render",
"unreal 5", "digital painting", "illustration", "volumetric lighting", "dreamy",
"cinematic", "surreal", "pixelate", "macabre"
]
#add time prompts
all_time = [ "futuristic", "modern", "ancient", "antique", "retro", "old-fashioned", "youthful" ]
#add light prompts
all_light = [ "daylight", "moonlight", "night sky", "natural light", "front light",
"backlight", "soft light", "hard light", "moody light", "dramatic light",
"dynamic light", "natural light", "at night" ]
all_art_by = [ "miho hirano", "makoto shinkai", "artgerm", "greg rutkowski", "magali villeneuve",
"mark ryden", "hayao miyazaki", "agnes Lawrence", "disney animation studio"]
#add background prompts
all_backgrounds = [ "a beach", "a table", "a park", "a concert", "a gym", "a library", "a mall", "a movie theater", "a hotel room", "a theme park",
"a city", "a mountain", "a blue house", "a wheat field", "a tree and autumn leaves", "the Eiffel Tower", "a jungle", "the snow",
"a cobblestone street", "underwater", "an ocean of milk", "pink fabric", "a wooden floor", "green grass with sunflowers around it",
"a mirror", "the sidewalk in a crowded street", "a dirt road", "a white rug", "a purple rug in a forest", "a red cube", "a purple cube",
"a building",
]
Debug_Prompts = False #True #
PRESET_DEBUG_PROMPTS = [ 'with a city in the background',
#'on a cobblestone street', 'on top of a wooden floor'
]
def sample_compositions(N, subj_type):
compos_prompts = []
modifiers = []
if Debug_Prompts:
K = len(PRESET_DEBUG_PROMPTS)
for i in range(N):
idx = np.random.choice(K)
compos_prompts.append(PRESET_DEBUG_PROMPTS[idx])
modifiers.append("")
return compos_prompts, modifiers
if subj_type == 'animal':
composition_regexs = all_composition_regexs
elif subj_type == 'object':
composition_regexs = static_composition_regexs
else:
raise ValueError(f"Unknown subject type: {subj_type}")
K = len(composition_regexs)
# Lower variations during training, to focus on the main semantics.
# 0.75: option 0 (without certain components),
# 0.25: option 1 (with certain components).
option_probs = [0.75, 0.25]
# 0.4: option 0 (without background),
# 0.6: option 1 (with background).
background_probs = [0.4, 0.6]
for i in range(N):
idx = np.random.choice(K)
composition = exrex.getone(composition_regexs[idx])
# Disable another object in the image for non-animal subjects,
# to avoid the spotlight of the non-animal subject being stolen by the other object.
if subj_type == 'animal':
has_another_obj = False #np.random.choice([0, 1], p=[0.7, 0.3])
else:
has_another_obj = False
if has_another_obj:
object2 = np.random.choice(coexist_objects)
location2 = np.random.choice(all_locations)
obj_loc2 = "a " + object2 + " " + location2
else:
obj_loc2 = ""
style_probs = [0.3, 0.2, 0.5]
has_styles = np.random.choice([0, 1, 2], p=style_probs)
if has_styles == 2: # 50% with 1 or 2 styles
num_styles = np.random.choice([1, 2])
styles = np.random.choice(all_styles, size=num_styles, replace=False)
# style = np.random.choice(all_styles) + ' '
style = "in " + " and ".join(styles) + " style"
elif has_styles == 1: # 20% with photorealistic as the style
style = "photorealistic"
elif has_styles == 0: # 30% without style
style = ""
has_art_by = np.random.choice([0, 1], p=option_probs)
if has_art_by:
num_art_by = np.random.choice([1, 2, 3])
art_bys = np.random.choice(all_art_by, size=num_art_by, replace=False)
art_by = "art by " + " and ".join(art_bys)
else:
art_by = ""
has_background = np.random.choice([0, 1], p=background_probs)
if has_background:
background = np.random.choice(all_backgrounds)
background = "with " + background + " as background"
else:
background = ""
has_time_theme = np.random.choice([0, 1], p=option_probs)
if has_time_theme:
time = np.random.choice(all_time)
else:
time = ""
has_light = np.random.choice([0, 1], p=option_probs)
has_light =1
if has_light:
light = np.random.choice(all_light)
light = "with " + light
else:
light = ""
modifier = ", ".join(filter(lambda s: len(s) > 0, [time, style, light, art_by]))
#compos_prompt = f"{composition}{obj_loc2}{background}"
compos_prompt = ", ".join(filter(lambda s: len(s) > 0, [composition, obj_loc2, background]))
modifiers.append(modifier)
compos_prompts.append(compos_prompt)
return compos_prompts, modifiers
if __name__ == "__main__":
compos_prompts, modifiers = sample_compositions(20, 'animal')
for i in range(20):
print(f"{i+1}:\t{modifiers[i]}")
print(f" \t{compos_prompts[i]}")
print()