Enhanced existing API generator to use OpenSearch OpenAPI spec

Signed-off-by: saimedhi <[email protected]>
opensearch-project · Jun 21, 2023 · f3ddc99 · f3ddc99
1 parent f1b5706
commit f3ddc99
Show file tree

Hide file tree

Showing 4 changed files with 186 additions and 60 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,7 +18,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 ### Fixed
 - Fixed import cycle when importing async helpers ([#311](https://github.com/opensearch-project/opensearch-py/pull/311))
 - Fixed userguide for async client ([#340](https://github.com/opensearch-project/opensearch-py/pull/340))
-- Include parsed error info in TransportError in async connections (fixes #225) ([#226](https://github.com/opensearch-project/opensearch-py/pull/226)
+- Include parsed error info in TransportError in async connections (fixes #225) ([#226](https://github.com/opensearch-project/opensearch-py/pull/226))
+- Enhanced existing API generator to use OpenSearch OpenAPI spec ([#412](https://github.com/opensearch-project/opensearch-py/pull/412))
 ### Security
 - Fixed CVE-2022-23491 reported in opensearch-dsl-py ([#295](https://github.com/opensearch-project/opensearch-py/pull/295))
 - Update ci workflows ([#318](https://github.com/opensearch-project/opensearch-py/pull/318))

diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md
@@ -3,6 +3,7 @@
   - [Running Tests](#running-tests)
   - [Linter](#linter)
   - [Documentation](#documentation)
+  - [Running Python Client Generator](#running-python-client-generator)
 
 # Developer Guide
 
@@ -72,3 +73,12 @@ make html
 ```
 
 Open `opensearch-py/docs/build/html/index.html` to see results.
+
+## Running Python Client Generator
+
+The following code executes a python client generator that updates the client by utilizing the [openapi specifications](https://github.com/opensearch-project/opensearch-api-specification/blob/main/OpenSearch.openapi.json) found in the "opensearch-api-specification" repository. This process allows for the automatic generation and synchronization of the client code with the latest API specifications.
+
+```
+cd opensearch-py
+python utils/generate-api.py
+```
diff --git a/utils/generate-api.py b/utils/generate-api.py
@@ -25,21 +25,15 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-
-import contextlib
-import io
-import json
 import os
 import re
-import shutil
-import sys
-import tempfile
-import zipfile
 from functools import lru_cache
-from itertools import chain
+from itertools import chain, groupby
+from operator import itemgetter
 from pathlib import Path
 
 import black
+import requests
 import unasync
 import urllib3
 from click.testing import CliRunner
@@ -51,6 +45,8 @@
 SEPARATOR = "    # AUTO-GENERATED-API-DEFINITIONS #"
 # global substitutions for python keywords
 SUBSTITUTIONS = {"type": "doc_type", "from": "from_"}
+
+
 # api path(s)
 BRANCH_NAME = "7.x"
 CODE_ROOT = Path(__file__).absolute().parent.parent
@@ -333,67 +329,187 @@ def to_python(self):
         )
 
 
-@contextlib.contextmanager
-def download_artifact(version):
-    # Download the list of all artifacts for a version
-    # and find the latest build URL for 'rest-resources-zip-*.zip'
-    resp = http.request(
-        "GET", f"https://artifacts-api.elastic.co/v1/versions/{version}"
+def read_modules():
+    modules = {}
+
+    # Load the OpenAPI specification file
+    response = requests.get(
+        "https://raw.githubusercontent.com/opensearch-project/opensearch-api-specification/main/OpenSearch.openapi.json"
     )
-    packages = json.loads(resp.data)["version"]["builds"][0]["projects"][
-        "opensearchpy"
-    ]["packages"]
-    for package in packages:
-        if re.match(r"^rest-resources-zip-.*\.zip$", package):
-            zip_url = packages[package]["url"]
-            break
-    else:
-        raise RuntimeError(
-            "Could not find the package 'rest-resources-zip-*.zip' in build"
-        )
+    data = response.json()
+
+    list_of_dicts = []
+
+    for path in data["paths"]:
+        for x in data["paths"][path]:
+            data["paths"][path][x].update({"path": path, "method": x})
+            list_of_dicts.append(data["paths"][path][x])
+
+    # Update parameters  in each endpoint
+    for p in list_of_dicts:
+        if "parameters" in p:
+            params = []
+            parts = []
+
+            # Iterate over the list of parameters and update them
+            for x in p["parameters"]:
+                if "schema" in x and "$ref" in x["schema"]:
+                    schema_path_ref = x["schema"]["$ref"].split("/")[-1]
+                    x["schema"] = data["components"]["schemas"][schema_path_ref]
+                    params.append(x)
+                else:
+                    params.append(x)
 
-    # Download the .jar file and unzip only the API
-    # .json files into a temporary directory
-    resp = http.request("GET", zip_url)
+            # Iterate over the list of updated parameters to separate "parts" from "params"
+            k = params.copy()
+            for q in k:
+                if q["in"] == "path":
+                    parts.append(q)
+                    params.remove(q)
 
-    tmp = Path(tempfile.mkdtemp())
-    zip = zipfile.ZipFile(io.BytesIO(resp.data))
-    for name in zip.namelist():
-        if not name.endswith(".json") or name == "schema.json":
-            continue
-        with (tmp / name.replace("rest-api-spec/api/", "")).open("wb") as f:
-            f.write(zip.read(name))
+            # Convert "params" and "parts" into the structure required for generator.
+            params_new = {}
+            parts_new = {}
 
-    yield tmp
-    shutil.rmtree(tmp)
+            for m in params:
+                A = dict(type=m["schema"]["type"], description=m["description"])
+                if "enum" in m["schema"]:
+                    A.update({"type": "enum"})
+                    A.update({"options": m["schema"]["enum"]})
 
+                if "deprecated" in m:
+                    A.update({"deprecated": m["deprecated"]})
+                params_new.update({m["name"]: A})
 
-def read_modules(version):
-    modules = {}
+            # Removing the deprecated "type"
+            if "type" in params_new:
+                params_new.pop("type")
 
-    with download_artifact(version) as path:
-        for f in sorted(os.listdir(path)):
-            name, ext = f.rsplit(".", 1)
+            if bool(params_new):
+                p.update({"params": params_new})
 
-            if ext != "json" or name == "_common":
-                continue
+            p.pop("parameters")
+
+            for n in parts:
+                B = dict(type=n["schema"]["type"])
+
+                if "description" in n:
+                    B.update({"description": n["description"]})
+
+                deprecated_new = {}
+                if "deprecated" in n:
+                    B.update({"deprecated": n["deprecated"]})
+
+                    if "x-deprecation-version" in n:
+                        deprecated_new.update({"version": n["x-deprecation-version"]})
 
-            with open(path / f) as api_def:
-                api = json.load(api_def)[name]
+                    if "x-deprecation-description" in n:
+                        deprecated_new.update(
+                            {"description": n["x-deprecation-description"]}
+                        )
 
+                parts_new.update({n["name"]: B})
+
+            if bool(parts_new):
+                p.update({"parts": parts_new})
+
+    # Sort the input list by the value of the "x-operation-group" key
+    list_of_dicts = sorted(list_of_dicts, key=itemgetter("x-operation-group"))
+
+    # Group the input list by the value of the "x-operation-group" key
+    for key, value in groupby(list_of_dicts, key=itemgetter("x-operation-group")):
+        api = {}
+
+        # Extract the namespace and name from the 'x-operation-group'
+        if "." in key:
+            namespace, name = key.rsplit(".", 1)
+        else:
             namespace = "__init__"
-            if "." in name:
-                namespace, name = name.rsplit(".", 1)
+            name = key
+
+        # Group the data in the current group by the "path" key
+        paths = []
+        for key2, value2 in groupby(value, key=itemgetter("path")):
+            # Extract the HTTP methods from the data in the current subgroup
+            methods = []
+            parts_final = {}
+            for z in value2:
+                methods.append(z["method"].upper())
+
+                # Update 'api' dictionary
+                if "documentation" not in api:
+                    documentation = {"description": z["description"]}
+                    api.update({"documentation": documentation})
+
+                if "params" not in api and "params" in z:
+                    api.update({"params": z["params"]})
+
+                if "body" not in api and "requestBody" in z:
+                    body = {"required": False}
+                    if "required" in z["requestBody"]:
+                        body.update({"required": z["requestBody"]["required"]})
+
+                    if "description" in z["requestBody"]:
+                        body.update({"description": z["requestBody"]["description"]})
+
+                    q = z["requestBody"]["content"]["application/json"]["schema"][
+                        "$ref"
+                    ].split("/")[-1]
+                    if "x-serialize" in data["components"]["schemas"][q]:
+                        body.update(
+                            {
+                                "serialize": data["components"]["schemas"][q][
+                                    "x-serialize"
+                                ]
+                            }
+                        )
+
+                    api.update({"body": body})
+
+                if "parts" in z:
+                    parts_final.update(z["parts"])
+
+            if "POST" in methods or "PUT" in methods:
+                api.update(
+                    {
+                        "stability": "stable",
+                        "visibility": "public",
+                        "headers": {
+                            "accept": ["application/json"],
+                            "content_type": ["application/json"],
+                        },
+                    }
+                )
+            else:
+                api.update(
+                    {
+                        "stability": "stable",
+                        "visibility": "public",
+                        "headers": {"accept": ["application/json"]},
+                    }
+                )
 
-            # The data_frame API has been changed to transform.
-            if namespace == "data_frame_transform_deprecated":
-                continue
+            if bool(deprecated_new) and bool(parts_final):
+                paths.append(
+                    {
+                        "path": key2,
+                        "methods": methods,
+                        "parts": parts_final,
+                        "deprecated": deprecated_new,
+                    }
+                )
+            elif bool(parts_final):
+                paths.append({"path": key2, "methods": methods, "parts": parts_final})
+            else:
+                paths.append({"path": key2, "methods": methods})
+
+        api.update({"url": {"paths": paths}})
 
-            if namespace not in modules:
-                modules[namespace] = Module(namespace)
+        if namespace not in modules:
+            modules[namespace] = Module(namespace)
 
-            modules[namespace].add(API(namespace, name, api))
-            modules[namespace].pyi.add(API(namespace, name, api, is_pyi=True))
+        modules[namespace].add(API(namespace, name, api))
+        modules[namespace].pyi.add(API(namespace, name, api, is_pyi=True))
 
     return modules
 
@@ -432,5 +548,4 @@ def dump_modules(modules):
 
 
 if __name__ == "__main__":
-    version = sys.argv[1]
-    dump_modules(read_modules(version))
+    dump_modules(read_modules())
diff --git a/utils/templates/base b/utils/templates/base
@@ -5,6 +5,7 @@
         {% if api.description %}
         {{ api.description|replace("\n", " ")|wordwrap(wrapstring="\n        ") }}
         {% endif %}
+
         {% if api.doc_url %}
 
         `<{{ api.doc_url }}>`_
@@ -34,4 +35,3 @@
         {% block request %}
         return await self.transport.perform_request("{{ api.method }}", {% include "url" %}, params=params, headers=headers{% if api.body %}, body=body{% endif %})
         {% endblock %}
-