Skip to content

Commit

Permalink
Possibility of spaces in label (ICDAR) (cvat-ai#182)
Browse files Browse the repository at this point in the history
* Allow spaces in label

* Add exception on multiple characters in segmentation line

Co-authored-by: Maxim Zhiltsov <[email protected]>
  • Loading branch information
yasakova-anastasia and Maxim Zhiltsov authored Mar 24, 2021
1 parent 6135403 commit 7a66b5e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- The ability to work with file names containing Cyrillic and spaces (<https://github.com/openvinotoolkit/datumaro/pull/148>)
- Image reading and saving in ICDAR formats (<https://github.com/openvinotoolkit/datumaro/pull/174>)
- Unnecessary image loading on dataset saving (<https://github.com/openvinotoolkit/datumaro/pull/176>)
- Allowed spaces in ICDAR captions (<https://github.com/openvinotoolkit/datumaro/pull/182>)

### Security
-
Expand Down
41 changes: 29 additions & 12 deletions datumaro/plugins/icdar_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,16 @@ def _load_recognition_items(self):
objects = line.split(', ')
if len(objects) == 2:
image = objects[0]
captions = objects[1].split()
objects = objects[1].split('\"')
if 1 < len(objects):
if len(objects) % 2:
captions = [objects[2 * i + 1]
for i in range(int(len(objects) / 2))]
else:
raise Exception("Line %s: unexpected number "
"of quotes in filename" % line)
else:
captions = objects[0].split()
else:
image = objects[0][:-1]
captions = []
Expand All @@ -71,8 +80,6 @@ def _load_recognition_items(self):

annotations = items[item_id].annotations
for caption in captions:
if caption[0] == '\"' and caption[-1] == '\"':
caption = caption[1:-1]
annotations.append(Caption(caption))

return items
Expand Down Expand Up @@ -101,18 +108,27 @@ def _load_localization_items(self):
with open(path, encoding='utf-8') as f:
for line in f:
line = line.strip()
objects = line.split()
objects = line.split('\"')
if 1 < len(objects):
if len(objects) == 3:
text = objects[1]
else:
raise Exception("Line %s: unexpected number "
"of quotes in filename" % line)
else:
text = ''
objects = objects[0].split()
if len(objects) == 1:
objects = line.split(',')
objects = objects[0].split(',')

if 8 <= len(objects):
points = [float(p) for p in objects[:8]]

attributes = {}
if len(objects) == 9:
if 0 < len(text):
attributes['text'] = text
elif len(objects) == 9:
text = objects[8]
if text[0] == '\"' and text[-1] == '\"':
text = text[1:-1]
attributes['text'] = text

annotations.append(
Expand All @@ -124,10 +140,10 @@ def _load_localization_items(self):
h = float(objects[3]) - y

attributes = {}
if len(objects) == 5:
if 0 < len(text):
attributes['text'] = text
elif len(objects) == 5:
text = objects[4]
if text[0] == '\"' and text[-1] == '\"':
text = text[1:-1]
attributes['text'] = text

annotations.append(
Expand Down Expand Up @@ -178,7 +194,8 @@ def _load_segmentation_items(self):
objects[9] = '\" \"'
objects.pop()
if len(objects) != 10:
continue
raise Exception("Line %s contains the wrong number "
"of arguments, e.g. '241 73 144 1 4 0 3 1 4 \"h\"" % line)

centers.append(objects[3] + ' ' + objects[4])
groups.append(group)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_icdar_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_can_save_and_load_captions(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='a/b/1', subset='train',
image=np.ones((10, 15, 3)), annotations=[
Caption('caption_0'),
Caption('caption 0'),
]),
DatasetItem(id=2, subset='train',
image=np.ones((10, 15, 3)), annotations=[
Expand All @@ -138,12 +138,12 @@ def test_can_save_and_load_bboxes(self):
DatasetItem(id='a/b/1', subset='train',
image=np.ones((10, 15, 3)), annotations=[
Bbox(1, 3, 6, 10),
Bbox(0, 1, 3, 5, attributes={'text': 'word_0'}),
Bbox(0, 1, 3, 5, attributes={'text': 'word 0'}),
]),
DatasetItem(id=2, subset='train',
image=np.ones((10, 15, 3)), annotations=[
Polygon([0, 0, 3, 0, 4, 7, 1, 8],
attributes={'text': 'word_1'}),
attributes={'text': 'word 1'}),
Polygon([1, 2, 5, 3, 6, 8, 0, 7]),
]),
DatasetItem(id=3, subset='train',
Expand Down

0 comments on commit 7a66b5e

Please sign in to comment.