-
Notifications
You must be signed in to change notification settings - Fork 0
/
solution.py
172 lines (138 loc) · 7.08 KB
/
solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import numpy as np
from tqdm import tqdm
POSITION_ROW=0
N_TAGS_ROW=1
# fileNames = [ "a_example.txt", "b_lovely_landscapes.txt", "c_memorable_moments.txt", "d_pet_pictures.txt", "e_shiny_selfies.txt" ]
fileNames = [ "c_memorable_moments.txt" ]
def importer(file_path):
with open(file_path, 'r') as file:
firstLine = file.readline()
numberOfImages = int(firstLine)
dataSet = {}
i = 0
for line in file:
tokenizedLine = line.split()
image = {
"id": i,
"position": tokenizedLine[POSITION_ROW],
"tags": set(),
"nTags": int(tokenizedLine[N_TAGS_ROW])
}
pendingImages.add(i)
positionMap[image["position"]].add(image["id"])
for j in range(2, image["nTags"]+2):
tag = tokenizedLine[j]
if not tag in tagsMap:
tagsMap[tag] = set()
tagsMap[tag].add(image["id"])
if image["position"] != "H":
if not tag in vTagsMap:
vTagsMap[tag] = set()
vTagsMap[tag].add(image["id"])
image["tags"].add(tag)
i = i+1
dataSet[image["id"]] = image
return (numberOfImages, dataSet)
def calculateImagesDistance(slide1, slide2):
trans1Tags = set([ tag for imageId in slide1 for tag in dataset[imageId]["tags"]])
trans2Tags = set([ tag for imageId in slide2 for tag in dataset[imageId]["tags"]])
commonTags = trans1Tags & trans2Tags
notPresentIn2 = trans1Tags - trans2Tags
notPresentIn1 = trans2Tags - trans1Tags
distanceArray = [ len(commonTags), len(notPresentIn1), len(notPresentIn2) ]
return min(distanceArray)
def writeOutputFile(fileName):
f = open("./ouputs/" + fileName, "w")
f.write(str(len(slides))+"\n")
for slide in slides:
f.write(' '.join([str(image) for image in slide])+"\n")
def findBestCandidate(prevSlide):
previousTags = set([ tag for imageId in prevSlide for tag in dataset[imageId]["tags"]])
candidates = []
for tag in previousTags:
candidate = []
if len(tagsMap[tag]) > 0:
currentImage = tagsMap[tag].pop()
candidate.append(currentImage)
if dataset[currentImage]["position"] != "H":
positionMap["V"].discard(currentImage)
[ tagsMap[tagToRemove].discard(currentImage) for tagToRemove in dataset[currentImage]["tags"] ]
[ vTagsMap[tagToRemove].discard(currentImage) for tagToRemove in dataset[currentImage]["tags"] ]
currentImage2=None
for tag2 in previousTags:
if tag2 in vTagsMap and len(vTagsMap[tag2]) > 0:
currentImage2 = vTagsMap[tag2].pop()
tagsMap[tag2].discard(currentImage2)
positionMap["V"].discard(currentImage2)
[ tagsMap[tagToRemove].discard(currentImage2) if tagToRemove in tagsMap else None for tagToRemove in dataset[currentImage2]["tags"] ]
[ vTagsMap[tagToRemove].discard(currentImage2) if tagToRemove in vTagsMap else None for tagToRemove in dataset[currentImage2]["tags"] ]
candidate.append(currentImage2)
break
if currentImage2 == None:
if len(positionMap["V"]):
currentImage2 = positionMap["V"].pop()
[ tagsMap[tagToRemove].discard(currentImage2) for tagToRemove in dataset[currentImage2]["tags"] ]
[ vTagsMap[tagToRemove].discard(currentImage2) for tagToRemove in dataset[currentImage2]["tags"] ]
candidate.append(currentImage2)
else:
positionMap["H"].discard(currentImage)
[ tagsMap[tagToRemove].discard(currentImage) if tagToRemove in tagsMap else None for tagToRemove in dataset[currentImage]["tags"] ]
else:
if len(positionMap["H"]) > 0:
candidate = [ positionMap["H"].pop() ]
elif len(positionMap["V"]) >= 2:
candidate = [ positionMap["V"].pop(), positionMap["V"].pop() ]
elif len(positionMap["V"]) >= 1:
candidate = [ positionMap["V"].pop() ]
if len(candidate) > 0:
for currentImage in candidate:
[ tagsMap[tagToRemove].discard(currentImage) for tagToRemove in dataset[currentImage]["tags"] ]
[ vTagsMap[tagToRemove].discard(currentImage) if tagToRemove in vTagsMap else None for tagToRemove in dataset[currentImage]["tags"] ]
if len(candidate) > 0:
candidates.append(candidate)
print(candidates)
punctuations = [ calculateImagesDistance(prevSlide, candidate) for candidate in candidates ]
bestIndex = punctuations.index(max(punctuations))
bestCandidate = candidates[bestIndex]
candidates.remove(bestCandidate)
for candidate in candidates:
if dataset[candidate[0]]["position"] != "H":
[ vTagsMap[tagToAdd].add(candidate[0]) for tagToAdd in dataset[candidate[0]]["tags"] ]
[ tagsMap[tagToAdd].add(candidate[0]) for tagToAdd in dataset[candidate[0]]["tags"] ]
positionMap["V"].add(candidate[0])
if len(candidate) > 1:
[ vTagsMap[tagToAdd].add(candidate[1]) for tagToAdd in dataset[candidate[1]]["tags"] ]
[ tagsMap[tagToAdd].add(candidate[1]) for tagToAdd in dataset[candidate[1]]["tags"] ]
positionMap["V"].add(candidate[1])
else:
[ tagsMap[tagToAdd].add(candidate[0]) for tagToAdd in dataset[candidate[0]]["tags"] ]
positionMap["H"].add(candidate[0])
[ usedImages.add(currentImage) for currentImage in bestCandidate ]
[ pendingImages.discard(currentImage) for currentImage in bestCandidate ]
return bestCandidate
for fileName in fileNames:
print(fileName)
tagsMap = {}
positionMap = {
"H": set(),
"V": set()
}
vTagsMap = {}
slides = []
usedImages = set()
pendingImages = set()
numberOfImages, dataset = importer('./inputs/'+fileName)
# choose first image
initialSlide = [ positionMap["H"].pop() ] if len(positionMap["H"]) > 0 else [ positionMap["V"].pop(), positionMap["V"].pop()]
[ usedImages.add(imageId) for imageId in initialSlide ]
[ pendingImages.remove(imageId) for imageId in initialSlide ]
[ tagsMap[tagToRemove].discard(currentImage) for currentImage in initialSlide for tagToRemove in dataset[currentImage]["tags"] ]
[ vTagsMap[tagToRemove].discard(currentImage) if tagToRemove in vTagsMap else None for currentImage in initialSlide for tagToRemove in dataset[currentImage]["tags"] ]
slides.append(initialSlide)
previousSlide = initialSlide
while len(pendingImages) > 0:
print(len(pendingImages))
bestCandidate = findBestCandidate(previousSlide)
slides.append(bestCandidate)
previousSlide = bestCandidate
writeOutputFile(fileName)