Skip to content

Commit

Permalink
Enhanced existing API generator to use OpenSearch OpenAPI spec
Browse files Browse the repository at this point in the history
Signed-off-by: saimedhi <[email protected]>
  • Loading branch information
saimedhi committed Jun 21, 2023
1 parent f1b5706 commit f3ddc99
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 60 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Fixed
- Fixed import cycle when importing async helpers ([#311](https://github.com/opensearch-project/opensearch-py/pull/311))
- Fixed userguide for async client ([#340](https://github.com/opensearch-project/opensearch-py/pull/340))
- Include parsed error info in TransportError in async connections (fixes #225) ([#226](https://github.com/opensearch-project/opensearch-py/pull/226)
- Include parsed error info in TransportError in async connections (fixes #225) ([#226](https://github.com/opensearch-project/opensearch-py/pull/226))
- Enhanced existing API generator to use OpenSearch OpenAPI spec ([#412](https://github.com/opensearch-project/opensearch-py/pull/412))
### Security
- Fixed CVE-2022-23491 reported in opensearch-dsl-py ([#295](https://github.com/opensearch-project/opensearch-py/pull/295))
- Update ci workflows ([#318](https://github.com/opensearch-project/opensearch-py/pull/318))
Expand Down
10 changes: 10 additions & 0 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- [Running Tests](#running-tests)
- [Linter](#linter)
- [Documentation](#documentation)
- [Running Python Client Generator](#running-python-client-generator)

# Developer Guide

Expand Down Expand Up @@ -72,3 +73,12 @@ make html
```

Open `opensearch-py/docs/build/html/index.html` to see results.

## Running Python Client Generator

The following code executes a python client generator that updates the client by utilizing the [openapi specifications](https://github.com/opensearch-project/opensearch-api-specification/blob/main/OpenSearch.openapi.json) found in the "opensearch-api-specification" repository. This process allows for the automatic generation and synchronization of the client code with the latest API specifications.

```
cd opensearch-py
python utils/generate-api.py
```
231 changes: 173 additions & 58 deletions utils/generate-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,15 @@
# specific language governing permissions and limitations
# under the License.


import contextlib
import io
import json
import os
import re
import shutil
import sys
import tempfile
import zipfile
from functools import lru_cache
from itertools import chain
from itertools import chain, groupby
from operator import itemgetter
from pathlib import Path

import black
import requests
import unasync
import urllib3
from click.testing import CliRunner
Expand All @@ -51,6 +45,8 @@
SEPARATOR = " # AUTO-GENERATED-API-DEFINITIONS #"
# global substitutions for python keywords
SUBSTITUTIONS = {"type": "doc_type", "from": "from_"}


# api path(s)
BRANCH_NAME = "7.x"
CODE_ROOT = Path(__file__).absolute().parent.parent
Expand Down Expand Up @@ -333,67 +329,187 @@ def to_python(self):
)


@contextlib.contextmanager
def download_artifact(version):
# Download the list of all artifacts for a version
# and find the latest build URL for 'rest-resources-zip-*.zip'
resp = http.request(
"GET", f"https://artifacts-api.elastic.co/v1/versions/{version}"
def read_modules():
modules = {}

# Load the OpenAPI specification file
response = requests.get(
"https://raw.githubusercontent.com/opensearch-project/opensearch-api-specification/main/OpenSearch.openapi.json"
)
packages = json.loads(resp.data)["version"]["builds"][0]["projects"][
"opensearchpy"
]["packages"]
for package in packages:
if re.match(r"^rest-resources-zip-.*\.zip$", package):
zip_url = packages[package]["url"]
break
else:
raise RuntimeError(
"Could not find the package 'rest-resources-zip-*.zip' in build"
)
data = response.json()

list_of_dicts = []

for path in data["paths"]:
for x in data["paths"][path]:
data["paths"][path][x].update({"path": path, "method": x})
list_of_dicts.append(data["paths"][path][x])

# Update parameters in each endpoint
for p in list_of_dicts:
if "parameters" in p:
params = []
parts = []

# Iterate over the list of parameters and update them
for x in p["parameters"]:
if "schema" in x and "$ref" in x["schema"]:
schema_path_ref = x["schema"]["$ref"].split("/")[-1]
x["schema"] = data["components"]["schemas"][schema_path_ref]
params.append(x)
else:
params.append(x)

# Download the .jar file and unzip only the API
# .json files into a temporary directory
resp = http.request("GET", zip_url)
# Iterate over the list of updated parameters to separate "parts" from "params"
k = params.copy()
for q in k:
if q["in"] == "path":
parts.append(q)
params.remove(q)

tmp = Path(tempfile.mkdtemp())
zip = zipfile.ZipFile(io.BytesIO(resp.data))
for name in zip.namelist():
if not name.endswith(".json") or name == "schema.json":
continue
with (tmp / name.replace("rest-api-spec/api/", "")).open("wb") as f:
f.write(zip.read(name))
# Convert "params" and "parts" into the structure required for generator.
params_new = {}
parts_new = {}

yield tmp
shutil.rmtree(tmp)
for m in params:
A = dict(type=m["schema"]["type"], description=m["description"])
if "enum" in m["schema"]:
A.update({"type": "enum"})
A.update({"options": m["schema"]["enum"]})

if "deprecated" in m:
A.update({"deprecated": m["deprecated"]})
params_new.update({m["name"]: A})

def read_modules(version):
modules = {}
# Removing the deprecated "type"
if "type" in params_new:
params_new.pop("type")

with download_artifact(version) as path:
for f in sorted(os.listdir(path)):
name, ext = f.rsplit(".", 1)
if bool(params_new):
p.update({"params": params_new})

if ext != "json" or name == "_common":
continue
p.pop("parameters")

for n in parts:
B = dict(type=n["schema"]["type"])

if "description" in n:
B.update({"description": n["description"]})

deprecated_new = {}
if "deprecated" in n:
B.update({"deprecated": n["deprecated"]})

if "x-deprecation-version" in n:
deprecated_new.update({"version": n["x-deprecation-version"]})

with open(path / f) as api_def:
api = json.load(api_def)[name]
if "x-deprecation-description" in n:
deprecated_new.update(
{"description": n["x-deprecation-description"]}
)

parts_new.update({n["name"]: B})

if bool(parts_new):
p.update({"parts": parts_new})

# Sort the input list by the value of the "x-operation-group" key
list_of_dicts = sorted(list_of_dicts, key=itemgetter("x-operation-group"))

# Group the input list by the value of the "x-operation-group" key
for key, value in groupby(list_of_dicts, key=itemgetter("x-operation-group")):
api = {}

# Extract the namespace and name from the 'x-operation-group'
if "." in key:
namespace, name = key.rsplit(".", 1)
else:
namespace = "__init__"
if "." in name:
namespace, name = name.rsplit(".", 1)
name = key

# Group the data in the current group by the "path" key
paths = []
for key2, value2 in groupby(value, key=itemgetter("path")):
# Extract the HTTP methods from the data in the current subgroup
methods = []
parts_final = {}
for z in value2:
methods.append(z["method"].upper())

# Update 'api' dictionary
if "documentation" not in api:
documentation = {"description": z["description"]}
api.update({"documentation": documentation})

if "params" not in api and "params" in z:
api.update({"params": z["params"]})

if "body" not in api and "requestBody" in z:
body = {"required": False}
if "required" in z["requestBody"]:
body.update({"required": z["requestBody"]["required"]})

if "description" in z["requestBody"]:
body.update({"description": z["requestBody"]["description"]})

q = z["requestBody"]["content"]["application/json"]["schema"][
"$ref"
].split("/")[-1]
if "x-serialize" in data["components"]["schemas"][q]:
body.update(
{
"serialize": data["components"]["schemas"][q][
"x-serialize"
]
}
)

api.update({"body": body})

if "parts" in z:
parts_final.update(z["parts"])

if "POST" in methods or "PUT" in methods:
api.update(
{
"stability": "stable",
"visibility": "public",
"headers": {
"accept": ["application/json"],
"content_type": ["application/json"],
},
}
)
else:
api.update(
{
"stability": "stable",
"visibility": "public",
"headers": {"accept": ["application/json"]},
}
)

# The data_frame API has been changed to transform.
if namespace == "data_frame_transform_deprecated":
continue
if bool(deprecated_new) and bool(parts_final):
paths.append(
{
"path": key2,
"methods": methods,
"parts": parts_final,
"deprecated": deprecated_new,
}
)
elif bool(parts_final):
paths.append({"path": key2, "methods": methods, "parts": parts_final})
else:
paths.append({"path": key2, "methods": methods})

api.update({"url": {"paths": paths}})

if namespace not in modules:
modules[namespace] = Module(namespace)
if namespace not in modules:
modules[namespace] = Module(namespace)

modules[namespace].add(API(namespace, name, api))
modules[namespace].pyi.add(API(namespace, name, api, is_pyi=True))
modules[namespace].add(API(namespace, name, api))
modules[namespace].pyi.add(API(namespace, name, api, is_pyi=True))

return modules

Expand Down Expand Up @@ -432,5 +548,4 @@ def dump_modules(modules):


if __name__ == "__main__":
version = sys.argv[1]
dump_modules(read_modules(version))
dump_modules(read_modules())
2 changes: 1 addition & 1 deletion utils/templates/base
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{% if api.description %}
{{ api.description|replace("\n", " ")|wordwrap(wrapstring="\n ") }}
{% endif %}

{% if api.doc_url %}

`<{{ api.doc_url }}>`_
Expand Down Expand Up @@ -34,4 +35,3 @@
{% block request %}
return await self.transport.perform_request("{{ api.method }}", {% include "url" %}, params=params, headers=headers{% if api.body %}, body=body{% endif %})
{% endblock %}

0 comments on commit f3ddc99

Please sign in to comment.