-
Notifications
You must be signed in to change notification settings - Fork 81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor data structures #913
Labels
enhancement
New feature or request
Comments
WIP classDiagram
class Project {
name: String
releases: Release[]
}
Project --> Release
class Release {
name: String
label: String
summary: String
description: String
keywords: String[]
version: String
commit_sha: String
created_at: Date
image: String
license: String
references: Reference[]
links_homepage: String
links_documentation: String
links_repository: String
links_issue_tracker: String
authors: Author[]
info: Map<String, String>
component_types: ComponentType[]
artifact_types: ArtifactType[]
components: Component[]
artifacts: Artifact[]
}
class Reference {
doi: String?
bibtex: String
}
class Author {
person: Person
roles: String[]
}
Author --> Person
class Person {
name: String
affiliation: String[]?
email: String?
github: String?
orcid: String?
}
class ComponentType {
name: String
label: String
summary: String?
description: String?
}
Release --> ComponentType
class ComponentArtifactLink {
name: String
componentType: ComponentType
artifactType: ArtifactType
direction: String = "input" | "output"
required: Boolean
}
ComponentType --> ComponentArtifactLink
class ArtifactType {
name: String
label: String
summary: String?
description: String?
format: ArtifactFormat
}
ComponentArtifactLink --> ArtifactType
class ArtifactFormat {
type: String = "h5ad" | "parquet" | "json" | "yaml" | ...
schema: Json
}
ArtifactType --> ArtifactFormat
class Component {
type: ComponentType
name: String
label: String
summary: String?
description: String?
keywords: String[]?
references: Reference[]?
links_homepage: String?
links_documentation: String?
links_repository: String?
links_issue_tracker: String?
authors: Author[]?
info: Map<String, String>
arguments: Argument[]
}
Component --> ComponentType
class Argument {
type: String = "string" | "integer" | "float" | "boolean" | "file"
name: String
label: String
summary: String?
description: String?
default: String?
required: Boolean
choices: String[]?
min: Integer?
max: Integer?
example: String?
info: Map<String, String>
}
Component --> Argument
class Artifact {
location: String
type: ArtifactType
name: String?
label: String?
summary: String?
description: String?
keywords: String[]?
references: Reference[]?
links_homepage: String?
links_documentation: String?
links_repository: String?
links_issue_tracker: String?
authors: Author[]?
info: Map<String, String>
}
Artifact --> ArtifactType
class Execution {
component: Component
artifacts: ExecutionArtifact[]
arguments: ExecutionArgument[]
start_time: Date
end_time: Date
exit_code: Integer
duration_sec: Integer
cpu_pct: Double
peak_memory_mb: Double
disk_read_mb: Double
disk_write_mb: Double
log: String
}
Execution --> Component
class ExecutionArtifact {
name: String
execution: Execution
artifact: Artifact
direction: String = "input" | "output"
}
Execution --> ExecutionArtifact
ExecutionArtifact --> Artifact
class ExecutionArgument {
name: String
execution: Execution
argument: Argument
value: Json
}
Execution --> ExecutionArgument
ExecutionArgument --> Argument
TODO:
|
Required improvements:
classDiagram
class TaskInfo{
task_id: String
commit_sha: String?
task_name: String
task_summary: String
task_description: String
repo: String
authors: Author[]
}
class MethodInfo{
task_id: String
method_id: String
method_name: String
method_summary: String
method_description: String
is_baseline: Boolean
paper_reference: String[]?
code_url: String[]
implementation_url: String?
code_version: String?
commit_sha: String?
}
class MetricInfo{
task_id: String
metric_id: String
metric_name: String
metric_summary: String
metric_description: String
paper_reference: String[]?
implementation_url: String?
code_version: String?
commit_sha: String?
maximize: Boolean
}
class DatasetInfo {
task_id: String
dataset_id: String
dataset_name: String
dataset_summary: String
dataset_description: String
data_reference: String[]?
data_url: String?
date_created: Date
file_size: Long
}
class Results {
task_id: String
dataset_id: String
method_id: String
normalization_id: String?
metric_values: Map[String, Double | NA]
scaled_scores: Map[String, Double]
mean_score: Double
steps: Execution[]
}
class Execution {
comp_id: String
arguments: Map<string, any>
exit_code: Integer
log: String
duration_sec: Integer
cpu_pct: Double
peak_memory_mb: Double
disk_read_mb: Double
disk_write_mb: Double
}
class Author {
name: String
roles: String[]
info: AuthorInfo
}
class AuthorInfo {
email: String?
github: String?
orcid: String?
}
class QualityControl {
task_id: String
category: String
name: String
value: Double
severity: Integer
severity_value: Double
code: String
message: String
}
TaskInfo --> Author
Author --> AuthorInfo
Results --> Execution
Results --> MetricInfo
Results --> MethodInfo
Results --> DatasetInfo
|
Another attempt which will probably end up being trashed classDiagram
class Task {
name: String
releases: Release[]
}
Task --> Release
class Release {
task: Task
created_at: String
branch_or_tag: String
commit_sha: String
----
component_types: ComponentType[]
file_types: FileType[]
datasets: Dataset[]
methods: Method[]
metrics: Metric[]
executions: Execution[]
runs: Run[]
scores: Score[]
----
label: String
summary: String
description: String
keywords: String[]
license: String
image: Image
reference: Reference[]
links_homepage: String
links_documentation: String
links_repository: String
links_issue_tracker: String
authors: Author[]
}
Release --> ComponentType
Release --> FileType
Release --> Reference
Release --> Author
class Reference {
doi: String?
bibtex: String
}
class Author {
person: Person
task: Task
roles: String[]
}
Author --> Person
class Person {
name: String
affiliation: String[]?
email: String?
github: String?
orcid: String?
}
class ComponentType {
release: Release
fileTypes: ComponentTypeFileType[]
otherArguments: JSON
----
name: String
label: String
summary: String?
description: String?
}
ComponentType --> ComponentTypeFileType
class ComponentTypeFileType {
componentType: ComponentType
name: String
direction: String
required: Boolean
fileType: FileType
}
class FileType {
release: Release
----
formatType: "h5ad" | "parquet" | "json" | "yaml"
formatSchema: JSON
----
name: String
label: String
summary: String?
description: String?
}
FileType --> ComponentTypeFileType
class Component {
type?: ComponentType
----
name: String
label: String
summary: String?
description: String?
keywords: String[]
references: Reference[]
links_homepage: String
links_documentation: String
links_repository: String
links_issue_tracker: String
authors: Author[]
---
info: JSON
viashConfig: JSON
}
Component --> ComponentType
Release --> Component
class Dataset {
release: Release
file: File
----
name: String
label: String
summary: String?
description: String?
keywords: String[]
references: Reference[]
links_homepage: String
links_documentation: String
links_repository: String
links_issue_tracker: String
authors: Author[]
}
Release --> Dataset
Dataset --> File
class Method {
release: Release
components: Component[]
----
name: String
label: String
summary: String?
description: String?
keywords: String[]
references: Reference[]
links_homepage: String
links_documentation: String
links_repository: String
links_issue_tracker: String
authors: Author[]
}
Release --> Method
Method --> Component
class Metric {
release: Release
components: Component[]
----
min: Double
max: Double
maximize: Boolean
----
name: String
label: String
summary: String?
description: String?
keywords: String[]
references: Reference[]
links_homepage: String
links_documentation: String
links_repository: String
links_issue_tracker: String
authors: Author[]
}
Release --> Metric
Metric --> Component
class File {
fileType?: FileType
path: String
storage: Storage
}
File --> FileType
File --> Storage
class Storage {
type: "s3"
bucket: String
}
class Execution {
component: Component
arguments: JSON
inputs: File[]
outputs: File[]
----
exit_code: Integer
log: String
duration_sec: Integer
cpu_pct: Double
peak_memory_mb: Double
disk_read_mb: Double
disk_write_mb: Double
}
Execution --> Component
Execution --> File
class Run {
release: Release
dataset: Dataset
method: Method
arguments: JSON
steps: Execution[]
scores: Scores[]
}
Release --> Run
Run --> Dataset
Run --> Method
Run --> Execution
class Score {
run: Run
metric: Metric
value?: Double
steps: Execution[]
}
Run --> Score
Score --> Metric
Score --> Execution
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The data format of the results as published on the website hasn't changed significantly in comparison to OpenProblems v1. Each benchmark's outputs is eventually translated into json files named
task_info.json
,method_info.json
,metric_info.json
,results.json
,However, there are several issues with this format:
metric_id
but alsomaximize
The text was updated successfully, but these errors were encountered: