Implement GraphQL endpoint for ListArtifacts API (mlflow#12602)

xq-yin · Jul 15, 2024 · 2008fc6 · 2008fc6
1 parent 99933f8
commit 2008fc6
Show file tree

Hide file tree

Showing 7 changed files with 150 additions and 47 deletions.
diff --git a/mlflow/java/client/src/main/java/org/mlflow/api/proto/Service.java b/mlflow/java/client/src/main/java/org/mlflow/api/proto/Service.java
diff --git a/mlflow/protos/service.proto b/mlflow/protos/service.proto
@@ -341,6 +341,7 @@ service MlflowService {
       visibility: PUBLIC,
       rpc_doc_title: "List Artifacts",
     };
+    option (graphql) = {};
   }
 
   // Get a list of all values for the specified metric for a given run.

diff --git a/mlflow/protos/service_pb2.py b/mlflow/protos/service_pb2.py
diff --git a/mlflow/server/graphql/autogenerated_graphql_schema.py b/mlflow/server/graphql/autogenerated_graphql_schema.py
@@ -65,6 +65,18 @@ class MlflowGetMetricHistoryBulkIntervalResponse(graphene.ObjectType):
     metrics = graphene.List(graphene.NonNull(MlflowMetricWithRunId))
 
 
+class MlflowFileInfo(graphene.ObjectType):
+    path = graphene.String()
+    is_dir = graphene.Boolean()
+    file_size = LongString()
+
+
+class MlflowListArtifactsResponse(graphene.ObjectType):
+    root_uri = graphene.String()
+    files = graphene.List(graphene.NonNull(MlflowFileInfo))
+    next_page_token = graphene.String()
+
+
 class MlflowDataset(graphene.ObjectType):
     name = graphene.String()
     digest = graphene.String()
@@ -173,6 +185,13 @@ class MlflowGetMetricHistoryBulkIntervalInput(graphene.InputObjectType):
     max_results = graphene.Int()
 
 
+class MlflowListArtifactsInput(graphene.InputObjectType):
+    run_id = graphene.String()
+    run_uuid = graphene.String()
+    path = graphene.String()
+    page_token = graphene.String()
+
+
 class MlflowSearchRunsInput(graphene.InputObjectType):
     experiment_ids = graphene.List(graphene.String)
     filter = graphene.String()
@@ -195,6 +214,7 @@ class QueryType(graphene.ObjectType):
     mlflow_get_experiment = graphene.Field(MlflowGetExperimentResponse, input=MlflowGetExperimentInput())
     mlflow_get_metric_history_bulk_interval = graphene.Field(MlflowGetMetricHistoryBulkIntervalResponse, input=MlflowGetMetricHistoryBulkIntervalInput())
     mlflow_get_run = graphene.Field(MlflowGetRunResponse, input=MlflowGetRunInput())
+    mlflow_list_artifacts = graphene.Field(MlflowListArtifactsResponse, input=MlflowListArtifactsInput())
     mlflow_search_model_versions = graphene.Field(MlflowSearchModelVersionsResponse, input=MlflowSearchModelVersionsInput())
 
     def resolve_mlflow_get_experiment(self, info, input):
@@ -215,6 +235,12 @@ def resolve_mlflow_get_run(self, info, input):
         parse_dict(input_dict, request_message)
         return mlflow.server.handlers.get_run_impl(request_message)
 
+    def resolve_mlflow_list_artifacts(self, info, input):
+        input_dict = vars(input)
+        request_message = mlflow.protos.service_pb2.ListArtifacts()
+        parse_dict(input_dict, request_message)
+        return mlflow.server.handlers.list_artifacts_impl(request_message)
+
     def resolve_mlflow_search_model_versions(self, info, input):
         input_dict = vars(input)
         request_message = mlflow.protos.model_registry_pb2.SearchModelVersions()

diff --git a/mlflow/server/handlers.py b/mlflow/server/handlers.py
@@ -1025,6 +1025,13 @@ def _list_artifacts():
             "page_token": [_assert_string],
         },
     )
+    response_message = list_artifacts_impl(request_message)
+    response = Response(mimetype="application/json")
+    response.set_data(message_to_json(response_message))
+    return response
+
+
+def list_artifacts_impl(request_message):
     response_message = ListArtifacts.Response()
     if request_message.HasField("path"):
         path = request_message.path
@@ -1044,9 +1051,7 @@ def _list_artifacts():
 
     response_message.files.extend([a.to_proto() for a in artifact_entities])
     response_message.root_uri = run.info.artifact_uri
-    response = Response(mimetype="application/json")
-    response.set_data(message_to_json(response_message))
-    return response
+    return response_message
 
 
 @catch_mlflow_exception

diff --git a/mlflow/server/js/src/graphql/autogenerated_schema.gql b/mlflow/server/js/src/graphql/autogenerated_schema.gql
@@ -7,6 +7,7 @@ type Query {
   mlflowGetExperiment(input: MlflowGetExperimentInput): MlflowGetExperimentResponse
   mlflowGetMetricHistoryBulkInterval(input: MlflowGetMetricHistoryBulkIntervalInput): MlflowGetMetricHistoryBulkIntervalResponse
   mlflowGetRun(input: MlflowGetRunInput): MlflowGetRunResponse
+  mlflowListArtifacts(input: MlflowListArtifactsInput): MlflowListArtifactsResponse
   mlflowSearchModelVersions(input: MlflowSearchModelVersionsInput): MlflowSearchModelVersionsResponse
 
   """Simple echoing field"""
@@ -173,6 +174,25 @@ input MlflowGetRunInput {
   runUuid: String
 }
 
+type MlflowListArtifactsResponse {
+  rootUri: String
+  files: [MlflowFileInfo!]
+  nextPageToken: String
+}
+
+type MlflowFileInfo {
+  path: String
+  isDir: Boolean
+  fileSize: LongString
+}
+
+input MlflowListArtifactsInput {
+  runId: String
+  runUuid: String
+  path: String
+  pageToken: String
+}
+
 type MlflowSearchModelVersionsResponse {
   modelVersions: [MlflowModelVersion!]
   nextPageToken: String

diff --git a/tests/tracking/test_rest_tracking.py b/tests/tracking/test_rest_tracking.py
@@ -2294,3 +2294,54 @@ def test_search_runs_graphql(mlflow_client):
         {"info": {"runId": created_run_1.info.run_id}},
     ]
     assert json["data"]["mlflowSearchRuns"]["runs"] == expected
+
+
+def test_list_artifacts_graphql(mlflow_client, tmp_path):
+    name = "GraphqlTest"
+    experiment_id = mlflow_client.create_experiment(name)
+    created_run_id = mlflow_client.create_run(experiment_id).info.run_id
+    file_path = tmp_path / "test.txt"
+    file_path.write_text("hello world")
+    mlflow_client.log_artifact(created_run_id, file_path.absolute().as_posix())
+    mlflow_client.log_artifact(created_run_id, file_path.absolute().as_posix(), "testDir")
+
+    response = requests.post(
+        f"{mlflow_client.tracking_uri}/graphql",
+        json={
+            "query": f"""
+                fragment FilesFragment on MlflowListArtifactsResponse {{
+                    files {{
+                        path
+                        isDir
+                        fileSize
+                    }}
+                }}
+
+                query testQuery {{
+                    file: mlflowListArtifacts(input: {{ runId: "{created_run_id}" }}) {{
+                        ...FilesFragment
+                    }}
+                    subdir: mlflowListArtifacts(input: {{
+                        runId: "{created_run_id}",
+                        path: "testDir",
+                    }}) {{
+                        ...FilesFragment
+                    }}
+                }}
+            """,
+            "operationName": "testQuery",
+        },
+        headers={"content-type": "application/json; charset=utf-8"},
+    )
+
+    assert response.status_code == 200
+    json = response.json()
+    file_expected = [
+        {"path": "test.txt", "isDir": False, "fileSize": "11"},
+        {"path": "testDir", "isDir": True, "fileSize": "0"},
+    ]
+    assert json["data"]["file"]["files"] == file_expected
+    subdir_expected = [
+        {"path": "testDir/test.txt", "isDir": False, "fileSize": "11"},
+    ]
+    assert json["data"]["subdir"]["files"] == subdir_expected