Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(Metadata): add generic metadata and link with dataset, version, files #800

Merged
merged 29 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
87127e3
feat(Metadata): implements metadata module, extends Dataset, DatasetV…
nazarfil Jun 18, 2024
17e6bf1
fix: migration
nazarfil Sep 20, 2024
d7bc0c7
fix: tests
nazarfil Sep 20, 2024
73a0907
fix: metadata generation
nazarfil Sep 20, 2024
e0c52b2
fix: removed linked object
nazarfil Sep 24, 2024
a2b13f8
chore: rename attributes
nazarfil Sep 24, 2024
f812bad
fix: rmeoved has permission from model
nazarfil Sep 25, 2024
ef47f04
fix: renamed to opaqueId
nazarfil Sep 25, 2024
81bd55e
try generic relation
nazarfil Sep 25, 2024
fb5e5aa
fix: adds indexing migration
nazarfil Sep 26, 2024
b7dbfa6
fix: adds indexing migration
nazarfil Sep 26, 2024
3cd3424
chore: adds todos
nazarfil Sep 26, 2024
45a6e8b
fix: moves permissions to model
nazarfil Sep 30, 2024
d9f53ec
fix: implements opaqueId graphql resolver
nazarfil Sep 30, 2024
875c5fe
fix: removes target
nazarfil Sep 30, 2024
53e0db0
fix: fixes tests
nazarfil Oct 1, 2024
fa1f7c8
fix: tests dataset
nazarfil Oct 1, 2024
a63b309
chore: merged mirations
nazarfil Oct 2, 2024
1ebeeb9
chore: improve the process
qgerome Oct 3, 2024
534b23e
fix tests dataset
nazarfil Oct 4, 2024
fc8be42
fix tests metadata
nazarfil Oct 4, 2024
6b66b75
fix: fixes opaqueId to targetId
nazarfil Oct 8, 2024
8a55dbb
fix: encodes key in base64
nazarfil Oct 8, 2024
39a28ca
test: tests add system attribute
nazarfil Oct 9, 2024
7348ffa
adds properties to the file
nazarfil Oct 9, 2024
541ad11
modifies profiling test with proeprties
nazarfil Oct 9, 2024
314dff4
fix: value comparison for profiling
nazarfil Oct 9, 2024
17dcb53
fix: added test to valdiate attributes have been copied
nazarfil Oct 9, 2024
7db2aa0
Merge branch 'main' into feat-metadata_custom-nf
qgerome Oct 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ scalar JSON
scalar UUID
scalar Generic
scalar BigInt
scalar OpaqueID
type Query
type Mutation
30 changes: 30 additions & 0 deletions config/schema.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import pathlib
from uuid import UUID

Expand All @@ -7,12 +8,14 @@
make_executable_schema,
snake_case_fallback_resolvers,
)
from django.contrib.contenttypes.models import ContentType

from hexa.core.schema import config_bindables, config_type_defs
from hexa.countries.schema import countries_bindables, countries_type_defs
from hexa.databases.schema import databases_bindables, databases_types_def
from hexa.datasets.schema import datasets_bindables, datasets_type_defs
from hexa.files.schema import files_bindables, files_type_def
from hexa.metadata.schema import metadata_bindables, metadata_type_def
from hexa.notebooks.schema import notebooks_bindables, notebooks_type_defs
from hexa.pipelines.schema import pipelines_bindables, pipelines_type_defs
from hexa.plugins.connector_accessmod.schema import (
Expand All @@ -31,6 +34,7 @@
from hexa.workspaces.schema import workspaces_bindables, workspaces_type_def

uuid_scalar = ScalarType("UUID")
opaque_id_scalar = ScalarType("OpaqueID")


@uuid_scalar.value_parser
Expand All @@ -42,6 +46,29 @@ def parse_uuid_value(value):
raise ValueError(f'"{value}" is not a valid uuid')


@opaque_id_scalar.value_parser
def parse_opaque_id_value(value):
"""Decodes base64 value and returns its object instance

Raises
------
ObjectDoesNotExist: If the object instance or the content type does not exist
"""
base64_decoded_id = base64.b64decode(value).decode("utf-8")
instance_id, content_type_id = base64_decoded_id.split(":")
content_type = ContentType.objects.get_for_id(content_type_id)
model_instance = content_type.model_class().objects.get(id=instance_id)
return model_instance


@opaque_id_scalar.serializer
def serialize_opaque_id(value):
"""Encodes object instance id and content type to base64"""
content_type = ContentType.objects.get_for_model(value)
value = f"{value.id}:{content_type.id}"
return base64.b64encode(value.encode("utf-8")).decode("utf-8")


type_defs = load_schema_from_path(
f"{pathlib.Path(__file__).parent.resolve()}/graphql/schema.graphql"
)
Expand All @@ -58,13 +85,15 @@ def parse_uuid_value(value):
notebooks_type_defs,
pipelines_type_defs,
workspaces_type_def,
metadata_type_def,
databases_types_def,
files_type_def,
config_type_defs,
datasets_type_defs,
],
[
uuid_scalar,
opaque_id_scalar,
*pipelines_bindables,
*identity_bindables,
*tags_bindables,
Expand All @@ -75,6 +104,7 @@ def parse_uuid_value(value):
*countries_bindables,
*notebooks_bindables,
*workspaces_bindables,
*metadata_bindables,
*databases_bindables,
*files_bindables,
*datasets_bindables,
Expand Down
1 change: 1 addition & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@
"hexa.plugins.connector_accessmod",
"hexa.workspaces",
"hexa.databases",
"hexa.metadata",
"hexa.files",
"hexa.datasets",
"django_otp",
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ services:
- db

# This service is only used for the connector_accessmod app.
dataworker:
accessmod_dataworker:
<<: *common
command: "manage validate_fileset_worker"
restart: unless-stopped
Expand Down
29 changes: 18 additions & 11 deletions hexa/datasets/graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""
Dataset is a collection of files that are related to each other and are versioned.
"""
type Dataset {
type Dataset implements MetadataObject{
id: ID!
slug: String!
name: String!
Expand All @@ -16,6 +16,8 @@ type Dataset {
version(id: ID!): DatasetVersion
latestVersion: DatasetVersion
links(page: Int = 1, perPage: Int = 15): DatasetLinkPage!
attributes: [MetadataAttribute!]!
targetId: OpaqueID!
}

"""
Expand All @@ -34,7 +36,7 @@ type DatasetPermissions {
A version of a dataset. A version is a snapshot of the dataset at a point in time.

"""
type DatasetVersion {
type DatasetVersion implements MetadataObject{
id: ID!
name: String!
description: String
Expand All @@ -44,6 +46,8 @@ type DatasetVersion {
permissions: DatasetVersionPermissions!
fileByName(name: String!): DatasetVersionFile
files(page: Int = 1, perPage: Int = 15): DatasetVersionFilePage!
attributes: [MetadataAttribute!]!
targetId: OpaqueID!
}

"""
Expand Down Expand Up @@ -92,33 +96,37 @@ type DatasetVersionPermissions {
}

"""
Statuses that can occur when generating file metadata
Statuses that can occur when generating file sample
"""
enum FileMetadataStatus{
enum FileSampleStatus{
PROCESSING,
FAILED,
FINISHED
}

"""
Metadata for dataset file
File sample for dataset file
"""
type DatasetFileMetadata {
sample: JSON!
status: FileMetadataStatus!
type DatasetFileSample {
sample: JSON
status: FileSampleStatus!
statusReason: String
}

"""
A file in a dataset version.
"""
type DatasetVersionFile {
type DatasetVersionFile implements MetadataObject{
id: ID!
uri: String!
filename: String!
createdAt: DateTime!
createdBy: User
contentType: String!
fileMetadata: DatasetFileMetadata
fileSample: DatasetFileSample
properties: JSON
attributes: [MetadataAttribute!]!
targetId: OpaqueID!
}

"""
Expand Down Expand Up @@ -161,7 +169,6 @@ extend type WorkspacePermissions {
createDataset: Boolean!
}


"""
Input for creating a dataset.
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 4.2.16 on 2024-09-20 11:33

import uuid

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("datasets", "0008_alter_datasetfilemetadata_options"),
]

operations = [
migrations.CreateModel(
name="DatasetFileSample",
fields=[
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
("sample", models.JSONField(blank=True, default=list, null=True)),
(
"status",
models.CharField(
choices=[
("PROCESSING", "Processing"),
("FAILED", "Failed"),
("FINISHED", "Finished"),
],
default="PROCESSING",
max_length=10,
),
),
("status_reason", models.TextField(blank=True, null=True)),
(
"dataset_version_file",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="samples",
to="datasets.datasetversionfile",
),
),
],
options={
"ordering": ["-created_at"],
},
),
migrations.DeleteModel(
name="DatasetFileMetadata",
),
]
17 changes: 17 additions & 0 deletions hexa/datasets/migrations/0010_datasetversionfile_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.16 on 2024-10-09 07:45

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("datasets", "0009_datasetfilesample_delete_datasetfilemetadata"),
]

operations = [
migrations.AddField(
model_name="datasetversionfile",
name="properties",
field=models.JSONField(default=dict),
),
]
Loading
Loading