Skip to content

Commit

Permalink
Remove unused field types (#2250)
Browse files Browse the repository at this point in the history
  • Loading branch information
lferran authored Jun 18, 2024
1 parent d8cf1bf commit 17d54bb
Show file tree
Hide file tree
Showing 69 changed files with 330 additions and 2,151 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ and inference. We do all the hard work for you.
- Role based security system with upstream proxy authentication validation
- Resources with multiple fields and metadata
- Text/HTML/Markdown plain fields support
- Field types: text, file, link, conversation, layout
- Field types: text, file, link, conversation
- Storage layer support: TiKV, Redis and PostgreSQL
- Blob support with S3-compatible API, GCS and PG drivers
- Replication of index storage
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.0.4
5.0.0
2 changes: 1 addition & 1 deletion charts/nucliadb_reader/templates/reader.vs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
- method:
regex: "GET|OPTIONS"
uri:
regex: '^/api/v\d+/kb/[^/]+/(resource|slug)/[^/]+/(text|file|link|layout|conversation|keywordset|datetime).*'
regex: '^/api/v\d+/kb/[^/]+/(resource|slug)/[^/]+/(text|file|link|conversation).*'
- uri:
regex: '^/api/v\d+/kb/[^/]+/resources'
method:
Expand Down
1 change: 0 additions & 1 deletion docs/internal/KV.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ The prefix for storing mutable data on nucliadb are:
- `/metadata`
- `/classifications`
- `/relations`
- `/f/l/{field}` PB Field Layout
- `/f/t/{field}` PB Field Text
- `/f/u/{field}` PB Field Link
- `/f/f/{field}` PB Field File
Expand Down
8 changes: 0 additions & 8 deletions nucliadb/src/nucliadb/export_import/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@
"files",
"texts",
"conversations",
"layouts",
"keywordsets",
"datetimes",
]


Expand Down Expand Up @@ -191,11 +188,6 @@ def get_cloud_files(bm: writer_pb2.BrokerMessage) -> list[resources_pb2.CloudFil
for attachment in message.content.attachments:
_clone_collect_cf(binaries, attachment)

for layout in bm.layouts.values():
for block in layout.body.blocks.values():
if block.HasField("file"):
_clone_collect_cf(binaries, block.file)

for field_extracted_data in bm.file_extracted_data:
if field_extracted_data.HasField("file_thumbnail"):
_clone_collect_cf(binaries, field_extracted_data.file_thumbnail)
Expand Down
2 changes: 1 addition & 1 deletion nucliadb/src/nucliadb/ingest/fields/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from nucliadb_protos.writer_pb2 import Error
from nucliadb_utils.storages.storage import Storage, StorageField

SUBFIELDFIELDS = ["l", "c"]
SUBFIELDFIELDS = ("c",)


class FieldTypes(str, enum.Enum):
Expand Down
33 changes: 0 additions & 33 deletions nucliadb/src/nucliadb/ingest/fields/date.py

This file was deleted.

33 changes: 0 additions & 33 deletions nucliadb/src/nucliadb/ingest/fields/keywordset.py

This file was deleted.

55 changes: 0 additions & 55 deletions nucliadb/src/nucliadb/ingest/fields/layout.py

This file was deleted.

8 changes: 0 additions & 8 deletions nucliadb/src/nucliadb/ingest/orm/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
Basic,
ExtractedText,
FieldComputedMetadata,
FieldKeywordset,
FieldMetadata,
Metadata,
Origin,
Expand Down Expand Up @@ -532,13 +531,6 @@ def process_field_metadata(
)
self.brain.relations.append(rel)

def process_keywordset_fields(self, field_key: str, field: FieldKeywordset):
# all field keywords
if field:
for keyword in field.keywords:
self.labels["f"].append(f"{field_key}/{keyword.value}")
self.labels["fg"].append(keyword.value)

def apply_field_labels(
self,
field_key: str,
Expand Down
9 changes: 0 additions & 9 deletions nucliadb/src/nucliadb/ingest/orm/broker_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,6 @@ async def generate_field(
field = cast(Conversation, field)
value = await field.get_full_conversation()
self.bm.conversations[field_id].CopyFrom(value)
elif type_id == FieldType.KEYWORDSET:
value = await field.get_value()
self.bm.keywordsets[field_id].CopyFrom(value)
elif type_id == FieldType.DATETIME:
value = await field.get_value()
self.bm.datetimes[field_id].CopyFrom(value)
elif type_id == FieldType.LAYOUT:
value = await field.get_value()
self.bm.layouts[field_id].CopyFrom(value)

async def generate_extracted_text(
self,
Expand Down
15 changes: 0 additions & 15 deletions nucliadb/src/nucliadb/ingest/orm/processor/auditing.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,26 +91,11 @@ def iterate_auditable_fields(
yield key
yielded.add(key)

for field_id in message.layouts.keys():
key = (field_id, writer_pb2.FieldType.LAYOUT)
yield key
yielded.add(key)

for field_id in message.texts.keys():
key = (field_id, writer_pb2.FieldType.TEXT)
yield key
yielded.add(key)

for field_id in message.keywordsets.keys():
key = (field_id, writer_pb2.FieldType.KEYWORDSET)
yield key
yielded.add(key)

for field_id in message.datetimes.keys():
key = (field_id, writer_pb2.FieldType.DATETIME)
yield key
yielded.add(key)

for field_id in message.links.keys():
key = (field_id, writer_pb2.FieldType.LINK)
yield key
Expand Down
26 changes: 0 additions & 26 deletions nucliadb/src/nucliadb/ingest/orm/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,8 @@
from nucliadb.common.maindb.driver import Transaction
from nucliadb.ingest.fields.base import Field
from nucliadb.ingest.fields.conversation import Conversation
from nucliadb.ingest.fields.date import Datetime
from nucliadb.ingest.fields.file import File
from nucliadb.ingest.fields.generic import VALID_GENERIC_FIELDS, Generic
from nucliadb.ingest.fields.keywordset import Keywordset
from nucliadb.ingest.fields.layout import Layout
from nucliadb.ingest.fields.link import Link
from nucliadb.ingest.fields.text import Text
from nucliadb.ingest.orm.brain import FilePagePositions, ResourceBrain
Expand Down Expand Up @@ -87,23 +84,17 @@
logger = logging.getLogger(__name__)

KB_FIELDS: dict[int, Type] = {
FieldType.LAYOUT: Layout,
FieldType.TEXT: Text,
FieldType.FILE: File,
FieldType.LINK: Link,
FieldType.DATETIME: Datetime,
FieldType.KEYWORDSET: Keywordset,
FieldType.GENERIC: Generic,
FieldType.CONVERSATION: Conversation,
}

KB_REVERSE: dict[str, FieldType.ValueType] = {
"l": FieldType.LAYOUT,
"t": FieldType.TEXT,
"f": FieldType.FILE,
"u": FieldType.LINK,
"d": FieldType.DATETIME,
"k": FieldType.KEYWORDSET,
"a": FieldType.GENERIC,
"c": FieldType.CONVERSATION,
}
Expand Down Expand Up @@ -541,26 +532,12 @@ async def update_all_field_ids(
@processor_observer.wrap({"type": "apply_fields"})
async def apply_fields(self, message: BrokerMessage):
message_updated_fields = []
for field, layout in message.layouts.items():
fid = FieldID(field_type=FieldType.LAYOUT, field=field)
await self.set_field(fid.field_type, fid.field, layout)
message_updated_fields.append(fid)

for field, text in message.texts.items():
fid = FieldID(field_type=FieldType.TEXT, field=field)
await self.set_field(fid.field_type, fid.field, text)
message_updated_fields.append(fid)

for field, keywordset in message.keywordsets.items():
fid = FieldID(field_type=FieldType.KEYWORDSET, field=field)
await self.set_field(fid.field_type, fid.field, keywordset)
message_updated_fields.append(fid)

for field, datetimeobj in message.datetimes.items():
fid = FieldID(field_type=FieldType.DATETIME, field=field)
await self.set_field(fid.field_type, fid.field, datetimeobj)
message_updated_fields.append(fid)

for field, link in message.links.items():
fid = FieldID(field_type=FieldType.LINK, field=field)
await self.set_field(fid.field_type, fid.field, link)
Expand Down Expand Up @@ -873,9 +850,6 @@ async def compute_global_tags(self, brain: ResourceBrain):
basic.usermetadata,
valid_user_field_metadata,
)
if type == FieldType.KEYWORDSET:
field_data = await fieldobj.db_get_value()
brain.process_keywordset_fields(fieldkey, field_data)

@processor_observer.wrap({"type": "compute_global_text"})
async def compute_global_text(self):
Expand Down
3 changes: 0 additions & 3 deletions nucliadb/src/nucliadb/ingest/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ class PushPayload(BaseModel):
# Diff on Text Field
textfield: dict[str, models.Text] = {}

# Diff on a Layout Field
layoutfield: dict[str, models.LayoutDiff] = {}

# New conversations to process
conversationfield: dict[str, models.PushConversation] = {}

Expand Down
Loading

3 comments on commit 17d54bb

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 17d54bb Previous: 0d03d9f Ratio
tests/search/unit/search/test_fetch.py::test_highligh_error 2816.6890091681485 iter/sec (stddev: 0.000008281467252280512) 2841.0684406726436 iter/sec (stddev: 0.000004954958228416619) 1.01

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 17d54bb Previous: 0d03d9f Ratio
tests/search/unit/search/test_fetch.py::test_highligh_error 3040.999034259149 iter/sec (stddev: 0.0000017655456403347856) 2841.0684406726436 iter/sec (stddev: 0.000004954958228416619) 0.93

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 17d54bb Previous: 0d03d9f Ratio
tests/search/unit/search/test_fetch.py::test_highligh_error 2919.1734312946396 iter/sec (stddev: 0.0000015410485384222589) 2841.0684406726436 iter/sec (stddev: 0.000004954958228416619) 0.97

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.