Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge dev into main #52

Merged
merged 2 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 26 additions & 30 deletions Knowledge_Plugin/DOKE/call_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import queue
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
import openai
from openai import OpenAI

api_keys = [
"your openai keys"
Expand Down Expand Up @@ -44,55 +46,49 @@ def generate_Davinci(api_key, text):

def generate_chatgpt(api_key, prompt, version):
# 使用你的 API 密钥初始化 OpenAI GPT-3
openai.api_key = api_key
openai.api_base = "https://api.openai.com/v1"
client = OpenAI(api_key=api_key)
text = [{'role': 'user', 'content': prompt}]
if version == "0301":
model = "gpt-3.5-turbo-0301"
else:
model = "gpt-3.5-turbo"

for i in range(MAX_RETRIES):
try:
# 进行 GPT-3 聊天模型 API 调用,并设置超时时间
response = openai.ChatCompletion.create(
response = client.chat.completions.create(
model=model,
messages=text,
temperature=0.0,
request_timeout=30,
max_tokens=2048,
frequency_penalty=0.0,
presence_penalty=0.0
)
content = response['choices'][0]['message']['content']
content = response.choices[0].message.content.strip()
return content
except Exception as e:
print(f"{api_key}\nError occurred: {e}. Retrying...")
time.sleep(INTERVAL) # 重试之间的休眠时间
time.sleep(INTERVAL)
print(f"Failed to get response for prompt: {prompt} after {MAX_RETRIES} retries.")
return "None"

def generate_gpt4(prompt):
available_configs = [
{"api_key": "your apikey", "url": "deployment url"},
]
message = [{"role": "user", "content": prompt}]
data = {
"messages": message,
"max_tokens": 2048,
"temperature": 0.,
'n': 1,
}

def generate_gpt4(api_key, prompt):
client = OpenAI(api_key=api_key)
text = [{'role': 'user', 'content': prompt}]
for _ in range(MAX_RETRIES):
try:
config = random.choice(available_configs)
headers = {'Content-Type': 'application/json', 'api-key': config["api_key"]}
response = requests.post(config["url"], json=data, headers=headers)
# print(response)
if (response.status_code == 200):
answer = response.json()["choices"][0]["message"]['content'].strip()
return answer
response = client.chat.completions.create(
model="gpt-4",
messages=text,
temperature=0.0,
max_tokens=2048,
frequency_penalty=0.0,
presence_penalty=0.0
)
content = response.choices[0].message.content.strip()
return content
except Exception as e:
print(f"Error occurred: {e}. Retrying...")
time.sleep(30) # 重试之间的休眠时间

print(f"{api_key}\nError occurred: {e}. Retrying...")
time.sleep(INTERVAL)

print("out of max_retry_times")
return "Error"
Expand Down Expand Up @@ -154,7 +150,7 @@ def worker(i, model, version):
index, prompt = prompts_queue.get()
api_key = api_keys[i % len(api_keys)]
if model == "GPT4":
result = generate_gpt4(prompt)
result = generate_gpt4(api_key, prompt)
if model == "ChatGPT":
result = generate_chatgpt(api_key, prompt, version)
elif model == "Davinci":
Expand Down
6 changes: 3 additions & 3 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_empty.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
6 changes: 3 additions & 3 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_feature.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_global_I2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"global_cf_data_path": "data/ml1m/global_CF.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"global_cf_data_path": "../data/ml1m/global_CF.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his-can_I2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/MF_CF_candidate_pop.json",
"cf_data_path": "../data/ml1m/MF_CF_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his-can_U2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_candidate_pop.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_candidate_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
2 changes: 1 addition & 1 deletion Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
4 changes: 2 additions & 2 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I_path-I.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict-I.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict-I.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
4 changes: 2 additions & 2 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I_path-II.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict-II.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict-II.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict-III.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict-III.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
4 changes: 2 additions & 2 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_I2I_path.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"cf_data_path": "data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "data/ml1m/path_text_dict.json",
"cf_data_path": "../data/ml1m/normalized_CF.json",
"reasoning_path_data_path": "../data/ml1m/path_text_dict.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/DOKE/config/ml1m/popneg_his_U2I.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"sequential_data_path": "../../data/ml1m/sequential_data.txt",
"candidate_data_path": "../../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../../data/ml1m/metadata.json",
"u2i_data_path": "data/ml1m/SASRec_U2I_pop.json",
"sequential_data_path": "../data/ml1m/sequential_data.txt",
"candidate_data_path": "../data/ml1m/negative_samples_pop.txt",
"meta_data_path": "../data/ml1m/metadata.json",
"u2i_data_path": "../data/ml1m/SASRec_U2I_pop.json",
"topk": 200,
"max_his_len": 50,
"template": [
Expand Down
1 change: 1 addition & 0 deletions Knowledge_Plugin/Knowledge_Extraction/extract_U2I.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def prepare_U2I_dict(embedding, sequential_data, candidate_data):
candidate_scores = [(item2_id, score) for item2_id, score in enumerate(user_item_score[idx]) if item2_id in candidates]
U2I_candidate_dict[user] = sorted(candidate_scores, key=lambda x:-x[1])[:20]
return U2I_dict, U2I_candidate_dict

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='extract CF information')
parser.add_argument('--dataset', type=str, default='steam', help='dataset')
Expand Down
8 changes: 4 additions & 4 deletions Knowledge_Plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ For example:
```bash
cd Knowledge_Extraction
python extract_I2I.py \
--dataset beauty \
--dataset ml1m \
--negative_type pop
python extract_U2I.py \
--dataset beauty \
--dataset ml1m \
--negative_type pop
```

Expand All @@ -46,8 +46,8 @@ python generate_prompt.py \
--config config/ml1m/popneg_his_I2I.json \
--dataset ml1m
python call_openai.py \
--prompt out/prompts/ml1m/popneg_his_I2I_path.json \
--prompt out/prompts/ml1m/popneg_his_I2I.json \
--model ChatGPT \
--dataset ml1m
bash metric.bash out/result/ml1m/ChatGPT_popneg_his_I2I_path ml1m
bash metric.bash out/result/ml1m/ChatGPT_popneg_his_I2I ml1m
```
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ We need to process the following data sets separately:
+ Online Retail (https://www.kaggle.com/carrie1/ecommerce-data)

# Download
Create the directory at Knowledge_Plugin/
```bash
mkdir data/raw_data
cd data/raw_data
Expand All @@ -22,6 +23,9 @@ unzip ml-1m.zip

Run each notebook according to the dataset.

+ data_preprocess_amazon.ipynb
+ data_preprocess_ml1m.ipynb
+ data_preprocess_onlineretail.ipynb

# Result

Expand Down
Loading