limit deps of cmd line aici; fixes #32

microsoft · Jan 19, 2024 · d57dec8 · d57dec8
1 parent e60a2f2
commit d57dec8
Show file tree

Hide file tree

Showing 8 changed files with 71 additions and 63 deletions.
diff --git a/harness/run_hf.py b/harness/run_hf.py
@@ -8,6 +8,7 @@
 import torch
 import time
 import pyaici
+import pyaici.comms
 
 from transformers import (
     AutoTokenizer,
@@ -25,7 +26,7 @@
 
 class AsyncLogitProcessor(LogitsProcessor, BaseStreamer):
     def __init__(
-        self, runner: pyaici.AiciRunner, module_id: str, module_arg: str
+        self, runner: pyaici.comms.AiciRunner, module_id: str, module_arg: str
     ) -> None:
         super().__init__()
         self.runner = runner
@@ -72,7 +73,7 @@ def main(args):
     )
     model = cast(PreTrainedModel, model)
 
-    runner = pyaici.AiciRunner.from_cli(args)
+    runner = pyaici.runner_from_cli(args)
 
     arg = ""
     if args.aici_module_arg:

diff --git a/harness/run_vllm.py b/harness/run_vllm.py
@@ -12,7 +12,7 @@ def main(args: argparse.Namespace):
     engine_args = EngineArgs.from_cli_args(args)
 
     # build it first, so it fails fast
-    aici = pyaici.AiciRunner.from_cli(args)
+    aici = pyaici.runner_from_cli(args)
 
     engine = LLMEngine.from_engine_args(engine_args)
     pyaici.vllm.install(aici)

diff --git a/harness/vllm_server.py b/harness/vllm_server.py
@@ -455,7 +455,7 @@ async def fake_stream_generator() -> AsyncGenerator[str, None]:
         served_model = args.model
 
     # build it first, so it fails fast
-    aici = pyaici.AiciRunner.from_cli(args)
+    aici = pyaici.runner_from_cli(args)
 
     engine_args = AsyncEngineArgs.from_cli_args(args)
     engine = AsyncLLMEngine.from_engine_args(engine_args)

diff --git a/llama-cpp-low/llama.cpp b/llama-cpp-low/llama.cpp
diff --git a/pyaici/__init__.py b/pyaici/__init__.py
@@ -1,6 +1,54 @@
-from pyaici.comms import AiciRunner, add_cli_args
+import argparse
 
-__all__ = [
-    "AiciRunner",
-    "add_cli_args",
-]
+def runner_from_cli(args):
+    from pyaici.comms import AiciRunner
+
+    aici = AiciRunner(
+        rtpath=args.aici_rt,
+        tokenizer=args.aici_tokenizer,
+        trace_file=args.aici_trace,
+        rtargs=args.aici_rtarg,
+    )
+    return aici
+
+
+def add_cli_args(parser: argparse.ArgumentParser, single=False):
+    parser.add_argument(
+        "--aici-rt",
+        type=str,
+        required=True,
+        help="path to aicirt",
+    )
+    parser.add_argument(
+        "--aici-tokenizer",
+        type=str,
+        default="llama",
+        help="tokenizer to use; llama, gpt4, ...",
+    )
+    parser.add_argument(
+        "--aici-trace",
+        type=str,
+        help="save trace of aicirt interaction to a JSONL file",
+    )
+    parser.add_argument(
+        "--aici-rtarg",
+        "-A",
+        type=str,
+        default=[],
+        action="append",
+        help="pass argument to aicirt process",
+    )
+
+    if single:
+        parser.add_argument(
+            "--aici-module",
+            type=str,
+            required=True,
+            help="id of the module to run",
+        )
+        parser.add_argument(
+            "--aici-module-arg",
+            type=str,
+            default="",
+            help="arg passed to module (filename)",
+        )
diff --git a/pyaici/cli.py b/pyaici/cli.py
@@ -1,11 +1,11 @@
 import subprocess
-import ujson
+import json
 import sys
 import os
 import argparse
 
 from . import rest, jssrc
-from . import add_cli_args, AiciRunner
+from . import add_cli_args, runner_from_cli
 
 
 def cli_error(msg: str):
@@ -31,7 +31,7 @@ def build_rust(folder: str):
         stdout=-1,
         check=True,
     )
-    info = ujson.decode(r.stdout)
+    info = json.loads(r.stdout)
     if len(info["workspace_default_members"]) != 1:
         cli_error("please run from project, not workspace, folder")
     pkg_id = info["workspace_default_members"][0]
@@ -90,7 +90,7 @@ def ask_completion(cmd_args, *args, **kwargs):
     os.makedirs("tmp", exist_ok=True)
     path = "tmp/response.json"
     with open(path, "w") as f:
-        ujson.dump(res, f, indent=1)
+        json.dump(res, f, indent=1)
     print(f"response saved to {path}")
     print("Usage:", res["usage"])
     print("Storage:", res["storage"])
@@ -239,7 +239,7 @@ def main_inner():
         sys.exit(0)
 
     if args.subcommand == "benchrt":
-        AiciRunner.from_cli(args).bench()
+        runner_from_cli(args).bench()
         sys.exit(0)
 
     if args.subcommand == "tags":

diff --git a/pyaici/comms.py b/pyaici/comms.py
@@ -615,45 +615,3 @@ def response_by_seq_id(self, seq_id: int) -> Dict[str, Any]:
         Get the response for a given batch entry ID.
         """
         return self.last_mid_response.get(str(seq_id), None)
-
-
-def add_cli_args(parser: argparse.ArgumentParser, single=False):
-    parser.add_argument(
-        "--aici-rt",
-        type=str,
-        required=True,
-        help="path to aicirt",
-    )
-    parser.add_argument(
-        "--aici-tokenizer",
-        type=str,
-        default="llama",
-        help="tokenizer to use; llama, gpt4, ...",
-    )
-    parser.add_argument(
-        "--aici-trace",
-        type=str,
-        help="save trace of aicirt interaction to a JSONL file",
-    )
-    parser.add_argument(
-        "--aici-rtarg",
-        "-A",
-        type=str,
-        default=[],
-        action="append",
-        help="pass argument to aicirt process",
-    )
-
-    if single:
-        parser.add_argument(
-            "--aici-module",
-            type=str,
-            required=True,
-            help="id of the module to run",
-        )
-        parser.add_argument(
-            "--aici-module-arg",
-            type=str,
-            default="",
-            help="arg passed to module (filename)",
-        )
diff --git a/pyaici/rest.py b/pyaici/rest.py
@@ -1,5 +1,5 @@
 import requests
-import ujson
+import json
 import os
 import urllib.parse
 import sys
@@ -47,7 +47,7 @@ def _mk_url(path: str) -> str:
 def response_error(kind: str, resp: requests.Response):
     text = resp.text
     try:
-        d = ujson.decode(text)
+        d = json.loads(text)
         if "message" in d:
             text = d["message"]
     except:
@@ -63,7 +63,7 @@ def req(tp: str, url: str, **kwargs):
     if log_level >= 4:
         print(f"{tp.upper()} {url} headers={headers}")
         if "json" in kwargs:
-            print(ujson.dumps(kwargs["json"]))
+            print(json.dumps(kwargs["json"]))
     return requests.request(tp, url, headers=headers, **kwargs)
 
 
@@ -126,7 +126,7 @@ def completion(
 ):
     if ignore_eos is None:
         ignore_eos = not not ast_module
-    json = {
+    data = {
         "model": "",
         "prompt": prompt,
         "max_tokens": max_tokens,
@@ -137,15 +137,15 @@ def completion(
         "aici_arg": aici_arg,
         "ignore_eos": ignore_eos,
     }
-    resp = req("post", "completions", json=json, stream=True)
+    resp = req("post", "completions", json=data, stream=True)
     if resp.status_code != 200:
         raise response_error("completions", resp)
     texts = [""] * n
     logs = [""] * n
     full_resp = []
     storage = {}
     res = {
-        "request": json,
+        "request": data,
         "response": full_resp,
         "text": texts,
         "logs": logs,
@@ -161,7 +161,7 @@ def completion(
             continue
         decoded_line: str = line.decode("utf-8")
         if decoded_line.startswith("data: {"):
-            d = ujson.decode(decoded_line[6:])
+            d = json.loads(decoded_line[6:])
             full_resp.append(d)
             if "usage" in d:
                 res["usage"] = d["usage"]