forked from msoedov/validex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
119 lines (94 loc) · 3.12 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import re
from pydantic import BaseModel
import validex
# Example usage
class Superhero(BaseModel):
name: str
age: int
power: str
enemies: list[str]
def fix(self):
# Logic to auto fix and normalize the generated data
if self.age < 0:
self.age = 0
def check_hallucinations(self):
# Check name
if not re.match(r"^[A-Za-z\s-]+$", self.name):
raise ValueError(f"Name '{self.name}' contains unusual characters")
# Check age
if self.age < 0 or self.age > 1000:
raise ValueError(f"Age {self.age} seems unrealistic")
# Check power
if len(self.power) > 50:
raise ValueError("Power description is unusually long")
# Check enemies
if len(self.enemies) > 10:
raise ValueError("Unusually high number of enemies")
for enemy in self.enemies:
if not re.match(r"^[A-Za-z\s-]+$", enemy):
raise ValueError(f"Enemy name '{enemy}' contains unusual characters")
class Superhero2(BaseModel):
name: str
age: int
power: str
character: str
enemies: list[str]
def main(inference=True):
app = validex.App()
app.add("https://www.britannica.com/topic/list-of-superheroes-2024795")
app.add("*.txt")
# app.add("*.py")
app.add("https://www.britannica.com/robots.txt")
for _ in range(10):
app.add("https://www.britannica.com/robots.txt")
app.add("*.pdf")
# app.add("*.md")
# These calls are placeholders and won't work without implementing the extraction logic
superheroes = app.extract(Superhero)
print(f"Extracted superheroes: {list(superheroes)}")
[
(
Superhero(
name="Superman",
age=35,
power="Flight",
enemies=["Lex Luthor", "Doomsday"],
),
{"url": "https://www.britannica.com/topic/list-of-superheroes-2024795"},
),
(
Superhero(
name="Wonder Woman",
age=30,
power="Super Strength",
enemies=["Ares", "Cheetah"],
),
{"url": "https://www.britannica.com/topic/list-of-superheroes-2024795"},
),
]
multi_results = app.multi_extract(Superhero, Superhero2)
print(f"Multi-extraction results: {multi_results}")
first_hero = app.extract_first(Superhero)
print(f"First extracted hero: {first_hero}")
print(f"Total cost: ${app.cost()}")
print(f"Total usage: {app.usage}")
app.export_jsonl("fine_tune.jsonl")
app.display_stats()
if not inference:
return
app.fit()
app.save("state.validex")
struct = app.infer_extract(
"""
Superhero Name: Quantum Spark
Real Name: Dr. Amelia Quark
Origin: Dr. Amelia Quark was a brilliant physicist working on
cutting-edge quantum mechanics research.
During an experiment gone awry, she was bathed in exotic particles,
fundamentally altering her molecular structure.
"""
)
print(f"Inferred structure: {struct}")
if __name__ == "__main__":
main()
print("Program execution completed")