Skip to content

Commit

Permalink
add function for saving in csv and json format
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed Feb 13, 2024
1 parent 3a77023 commit ba2af0b
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 46 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ The oputput format is a dict and its the following:
}
```

# Credits
Thanks to:
- [nicolapiazzalunga](https://github.com/nicolapiazzalunga): for inspiring yosoai/convert_to_csv.py and yosoai/convert_to_json.py functions

# Developed by

<p align="center">
Expand Down
2 changes: 2 additions & 0 deletions tests/result.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0,1,2
1,2,3
1 change: 1 addition & 0 deletions tests/result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"trial": [1, 2, 3]}
18 changes: 18 additions & 0 deletions tests/test_convert_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Module for testing convert_to_json inside the folder yosoai/convert_to_json.py"""
import unittest
from yosoai.convert_to_csv import convert_to_csv


class TestConvertToCsvFunction(unittest.TestCase):
"""
class for testing convert_to_json inside the folder yosoai/convert_to_json.py
"""

def test_get_json(self):
"""
function for testing convert_to_json inside the folder yosoai/convert_to_json.py
"""
example = {"trial": [1, 2, 3]}
filename = "result"
path = "../YOSO-ai/tests"
convert_to_csv(example, filename, path)
20 changes: 20 additions & 0 deletions tests/test_convert_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Module for testing convert_to_json inside the folder yosoai/convert_to_json.py
"""
import unittest
from yosoai.convert_to_json import convert_to_json


class TestConvertToCsvFunction(unittest.TestCase):
"""
class for testing convert_to_json inside the folder yosoai/convert_to_json.py
"""

def test_get_json(self):
"""
function for testing convert_to_json inside the folder yosoai/convert_to_json.py
"""
example = {"trial": [1, 2, 3]}
filename = "result"
path = "../YOSO-ai/tests"
convert_to_json(example, filename, path)
Empty file removed tests/unit_tests/__init__.py
Empty file.
23 changes: 0 additions & 23 deletions tests/unit_tests/test_yosoai.py

This file was deleted.

4 changes: 2 additions & 2 deletions yosoai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
"""
from .class_creator import create_class
from .class_generator import Generator
from .getter import _get_function
from .token_calculator import truncate_text_tokens
from .getter import _get_function
from .request import send_request
from .convert_to_csv import convert_to_csv
36 changes: 36 additions & 0 deletions yosoai/convert_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Module that given a filename and a position saves the file in the csv format
"""
import os
import pandas as pd


def convert_to_csv(data: dict, filename: str, position: str):
"""
Convert data to JSON format and save it to a file.
Args:
data (dict): Data to save.
filename (str): Name of the file to save without .csv extension.
position (str): Directory where the file should be saved.
Raises:
ValueError: If filename contains '.csv'.
FileNotFoundError: If the specified directory does not exist.
PermissionError: If the program does not have permission to write to the directory.
"""
if ".csv" in filename:
raise ValueError("The filename should not contain '.csv'")

try:
os.makedirs(position, exist_ok=True)
pd.DataFrame.from_dict(data, orient='index').to_csv(
os.path.join(position, f"{filename}.csv"), index=False)
except FileNotFoundError as fnfe:
raise FileNotFoundError(
f"The specified directory '{position}' does not exist.") from fnfe
except PermissionError as pe:
raise PermissionError(
f"You don't have permission to write to '{position}'.") from pe
except Exception as e:
raise e
36 changes: 36 additions & 0 deletions yosoai/convert_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Module that given a filename and a position saves the file in the json format
"""
import json
import os


def convert_to_json(data: dict, filename: str, position: str):
"""
Convert data to CSV format and save it to a file.
Args:
data (dict): Data to save.
filename (str): Name of the file to save without .json extension.
position (str): Directory where the file should be saved.
Raises:
ValueError: If filename contains '.json'.
FileNotFoundError: If the specified directory does not exist.
PermissionError: If the program does not have permission to write to the directory.
"""
if ".json" in filename:
raise ValueError("The filename should not contain '.json'")

try:
os.makedirs(position, exist_ok=True)
with open(os.path.join(position, f"{filename}.json"), "w", encoding="utf-8") as f:
f.write(json.dumps(data))
except FileNotFoundError as fnfe:
raise FileNotFoundError(
f"The specified directory '{position}' does not exist.") from fnfe
except PermissionError as pe:
raise PermissionError(
f"You don't have permission to write to '{position}'.") from pe
except Exception as e:
raise e
4 changes: 2 additions & 2 deletions yosoai/json_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def get_json(key: str, link: str, model_name: str,
prompt = PromptTemplate(
template=("You are a website scraper and you want to extract information "
"in a schema like the example provided. Write a dictionary where "
"the key is the section and the value is the type.\n{format_instructions}\n{query}\n"
". Example: {example}"),
"the key is the section and the value is the type"
".\n{format_instructions}\n{query}\n. Example: {example}"),
input_variables=["query"],
partial_variables={
"format_instructions": parser.get_format_instructions(),
Expand Down
53 changes: 34 additions & 19 deletions yosoai/token_calculator.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,42 @@
"""
Module for calculating token truncation for text
"""
import tiktoken
from typing import List
from tiktoken import tokenizer

models_tokens = {
"gpt-3.5-turbo-0125": 16385,
"gpt-3.5-turbo": 4096,
"gpt-3.5-turbo-1106": 16385,
"gpt-3.5-turbo-instruct": 4096,
"gpt-4-0125-preview": 128000,
"gpt-4-turbo-preview": 128000,
"gpt-4-1106-preview": 128000,
"gpt-4-vision-preview": 128000,
"gpt-4": 8192,
"gpt-4-0613": 8192,
"gpt-4-32k": 32768,
"gpt-4-32k-0613": 32768,
}


def truncate_text_tokens(text: str, model: str, encoding_name: str) -> List[str]:
"""
Truncates the input text into smaller chunks based on the model's token limit.
It creates a list of strings to create max dimension tokenizable elements
Args:
text (str): The input text to be truncated.
model (str): The name of the language model.
encoding_name (str): The name of the encoding to be used.
text (str): The input text to be truncated into tokenizable elements.
model (str): The name of the language model to be used.
encoding_name (str): The name of the encoding to be used (default: EMBEDDING_ENCODING).
Returns:
List[str]: A list of truncated text chunks.
List[str]: A list of tokenizable elements created from the input text.
"""
# Calculate the token limit for the given model and encoding
token_limit = tokenizer.token_limit(model, encoding_name)
# Truncate the text into smaller chunks based on the token limit
chunks = []
start = 0
while start < len(text):
chunk = text[start:start+token_limit]
chunks.append(chunk)
start += token_limit
return chunks

encoding = tiktoken.get_encoding(encoding_name)
max_tokens = models_tokens[model] - 500
encoded_text = encoding.encode(text)

chunks = [encoded_text[i:i + max_tokens]
for i in range(0, len(encoded_text), max_tokens)]

result = [encoding.decode(chunk) for chunk in chunks]

return result

0 comments on commit ba2af0b

Please sign in to comment.