Skip to content

Commit

Permalink
Merge pull request #122 from lean-dojo/external
Browse files Browse the repository at this point in the history
Support external models
  • Loading branch information
Peiyang-Song authored Sep 1, 2024
2 parents 5e35567 + 30a70c0 commit 8c2e4f8
Show file tree
Hide file tree
Showing 10 changed files with 522 additions and 4 deletions.
23 changes: 23 additions & 0 deletions LeanCopilotTests/ModelAPIs.lean
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,27 @@ def reproverExternalEncoder : ExternalEncoder := {
-- Go to ./python and run `uvicorn server:app --port 23337`
#eval encode reproverExternalEncoder "n : ℕ\n⊢ gcd n n = n"
/--
General-purpose LLM apis: openai, claude, etc.
-/
def gpt4 : ExternalGenerator := {
name := "gpt4"
host := "localhost"
port := 23337
}
#eval generate gpt4 "n : ℕ\n⊢ gcd n n = n"
/--
Math LLMs: InternLM, Deepseekmath, etc.
-/
def internLM : ExternalGenerator := {
name := "InternLM"
host := "localhost"
port := 23337
}
#eval generate internLM "n : ℕ\n⊢ gcd n n = n"
-/
17 changes: 14 additions & 3 deletions python/README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
Python Server for External Models
=================================

This folder contains code that enables running some of the leading general-purpose or math-specific models. It is also fairly easy to adapt the existing code and run other external models you would like to bring.

## Requirements

The setup steps are pretty simple. The script below is sufficient to run all external models already supported in this folder. If you only want to run a subset of them, you may not need all packages in the last step of pip installation.

```bash
conda create --name lean-copilot python=3.10 python numpy
conda activate lean-copilot
pip install torch --index-url https://download.pytorch.org/whl/cu121 # Depending on whether you have CUDA and the CUDA version; see https://pytorch.org/.
pip install fastapi uvicorn loguru transformers openai
pip install torch --index-url https://download.pytorch.org/whl/cu121 # Depending on whether you have CUDA and, if so, your CUDA version; see https://pytorch.org/.
pip install fastapi uvicorn loguru transformers openai anthropic google.generativeai vllm
```


## Running the Server

```bash
uvicorn server:app --port 23337
```

After the server is up running, you can go to `LeanCopilotTests/ModelAPIs.lean` to try your external models out!

## Contributions

We welcome contributions. If you think it would beneficial to add some other external models, or if you would like to make other contributions regarding the external model support in Lean Copilot, please feel free to open a PR. The main entry point is this `python` folder as well as the `ModelAPIs.lean` file under `LeanCopilotTests`.

We use [`black`](https://pypi.org/project/black/) to format code in this folder.
5 changes: 5 additions & 0 deletions python/external_models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .oai_runner import OpenAIRunner
from .hf_runner import HFTacticGenerator
from .vllm_runner import VLLMTacticGenerator
from .claude_runner import ClaudeRunner
from .gemini_runner import GeminiRunner
47 changes: 47 additions & 0 deletions python/external_models/claude_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import List, Tuple
import os

try:
from anthropic import Anthropic
except ImportError as e:
pass
from .external_parser import *


class ClaudeRunner(Generator, Transformer):
client = Anthropic(api_key=os.getenv("ANTHROPIC_KEY"))

def __init__(self, **args):
self.client_kwargs: dict[str | str] = {
"model": args["model"],
"temperature": args["temperature"],
"max_tokens": args["max_tokens"],
"top_p": args["top_p"],
}
self.name = self.client_kwargs["model"]

def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
prompt = pre_process_input(self.name, input + target_prefix)

response = self.client.completions.create(
prompt=prompt,
**self.client_kwargs,
)
content = response.completion

results = [
(post_process_output(self.name, content), 1.0)
] # Currently Claude only supports one output.
return choices_dedup(results)


if __name__ == "__main__":
generation_kwargs = {
"model": "claude-3-opus",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
}

model = ClaudeRunner(**generation_kwargs)
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
83 changes: 83 additions & 0 deletions python/external_models/external_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import torch
import numpy as np
from typing import List, Tuple
from abc import ABC, abstractmethod


def get_cuda_if_available():
return torch.device("cuda" if torch.cuda.is_available() else "cpu")


def pre_process_input(model_name, input):
if model_name == "internlm/internlm2-math-plus-1_8b":
prompt = (
"My LEAN 4 state is:\n```lean\n"
+ input
+ "```\nPlease predict a possible tactic to help me prove the theorem."
)
prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""
elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview":
prompt = (
"Here is a theorom you need to prove in Lean:\n"
+ input
+ "\nNow you should suggest one line tactic in lean code:"
)
elif "gemini" in model_name or "claude" in model_name:
prompt = (
"Here is a theorom you need to prove in Lean:\n"
+ input
+ "\nNow you should suggest one line tactic in lean code:"
)
else:
raise NotImplementedError(f"External model '{model_name}' not supported")
return prompt


def post_process_output(model_name, output):
if model_name == "internlm/internlm2-math-plus-1_8b":
result = (
output.split("assistant")[-1]
.split("lean")[-1]
.split("```")[0]
.split("\n")[1]
)
elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview":
result = output.split("lean")[-1].split("```")[0].split("\n")[1]
elif "gemini" in model_name or "claude" in model_name:
result = output.split("lean")[-1].split("```")[0].split("\n")[1]
else:
raise NotImplementedError(f"External model '{model_name}' not supported")
return result


def choices_dedup(output_list: List[tuple[str, float]]) -> List[tuple[str, float]]:
unique_data = {}
for item in output_list:
if item[0] not in unique_data or item[1] > unique_data[item[0]]:
unique_data[item[0]] = item[1]
sorted_data = sorted(unique_data.items(), key=lambda x: x[1], reverse=True)
return sorted_data


class Generator(ABC):
@abstractmethod
def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
pass


class Encoder(ABC):
@abstractmethod
def encode(self, input: str) -> np.ndarray:
pass


class Transformer:
def cuda(self) -> None:
self.model.cuda()

def cpu(self) -> None:
self.model.cpu()

@property
def device(self) -> torch.device:
return self.model.device
73 changes: 73 additions & 0 deletions python/external_models/gemini_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from typing import List, Tuple
import os

try:
import google.generativeai as genai
from google.generativeai import GenerationConfig
except ImportError as e:
pass
from .external_parser import *


class GeminiRunner(Generator, Transformer):
client = genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
safety_settings = [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]

def __init__(self, **args):
self.client_kwargs: dict[str | str] = {
"model": args["model"],
"temperature": args["temperature"],
"max_tokens": args["max_tokens"],
"top_p": args["top_p"],
}
self.name = self.client_kwargs["model"]
self.client = genai.GenerativeModel(args["model"])
self.generation_config = GenerationConfig(
candidate_count=1,
max_output_tokens=args["max_tokens"],
temperature=args["temperature"],
top_p=args["top_p"],
)

def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
prompt = pre_process_input(self.name, input + target_prefix)

response = self.client.generate_content(
prompt,
generation_config=self.generation_config,
safety_settings=GeminiRunner.safety_settings,
)

results = [
(post_process_output(self.name, response.text), 1.0)
] # Currently Gemini only supports one output.
return choices_dedup(results)


if __name__ == "__main__":
generation_kwargs = {
"model": "gemini-1.0-pro",
"temperature": 0.9,
"max_tokens": 1024,
"top_p": 0.9,
}

model = GeminiRunner(**generation_kwargs)
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
88 changes: 88 additions & 0 deletions python/external_models/hf_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import torch
from loguru import logger
from typing import List, Tuple
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
)
from .external_parser import *


class HFTacticGenerator(Generator, Transformer):
def __init__(self, **args) -> None:
self.name = args["model"]
self.tokenizer = AutoTokenizer.from_pretrained(
self.name, trust_remote_code=True
)
device = args["device"]
if device == "auto":
device = get_cuda_if_available()
else:
device = torch.device(device)
logger.info(f"Loading {self.name} on {device}")
self.model = AutoModelForCausalLM.from_pretrained(
self.name, trust_remote_code=True
).to(device)

self.generation_args: dict[str | str] = {
"do_sample": args["do_sample"],
"temperature": args["temperature"], # chat default is 0.8.
"max_new_tokens": args["max_new_tokens"],
"top_p": args["top_p"], # chat default is 0.8.
"num_return_sequences": args["num_return_sequences"],
# "num_beams": self.num_return_sequences, # Here if we use beam search for llms the output are not diverse (few tactics are provided).
"output_scores": args["output_scores"],
"output_logits": args["output_logits"],
"return_dict_in_generate": args["return_dict_in_generate"],
}

def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]:
prompt = input + target_prefix
'''prompt= 'Here is a theorom you need to prove in Lean:\n'+prompt+'\nNow you should suggest one line tactic in lean code:'
prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"""
'''
prompt = pre_process_input(self.name, prompt)

self.model = self.model.eval()

tokenized_input = self.tokenizer(prompt, return_tensors="pt")
eos_token_id = [
self.tokenizer.eos_token_id,
self.tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0],
]
outputs = self.model.generate(
tokenized_input.input_ids.to(self.device),
eos_token_id=eos_token_id,
**self.generation_args,
)
response = self.tokenizer.batch_decode(
outputs["sequences"], skip_special_tokens=True
)

result = []
index = 0
for out, score in zip(response, outputs.scores):
out = post_process_output(self.name, out)
result.append((out, score[index].exp().sum().log().cpu().item()))
index += 1
result = choices_dedup(result)
return result


if __name__ == "__main__":
generation_kwargs = {
"model": "internlm/internlm2-math-plus-1_8b",
"temperature": 0.6,
"max_new_tokens": 1024,
"top_p": 0.9,
"length_penalty": 0,
"num_return_sequences": 64,
"do_sample": True,
"output_scores": True,
"output_logits": False,
"return_dict_in_generate": True,
"device": "auto",
}
model = HFTacticGenerator(**generation_kwargs)
model.cuda()
print(model.generate("n : ℕ\n⊢ gcd n n = n"))
Loading

0 comments on commit 8c2e4f8

Please sign in to comment.