-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
59 lines (46 loc) · 2.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import re
import requests
from bs4 import BeautifulSoup
import marvin
from pydantic import BaseModel, Field
# Provide an OpenAI API key for Marvin
# ...
# Provide a GovInfo API key (free!)
GOVINFO_API_KEY = "PASTE_YOUR_KEY_HERE"
# Provide a date in to get Congressional records from
YEAR = "2024"
MONTH = "01"
DAY = "29"
# Marvin class that defines a bill
class Bill(BaseModel):
title: str = Field(description='Official title of the bill')
number: str = Field(description='H.R. or S. + bill number')
summary: str = Field(description='A cohesive, fluent, TL;DR style summary of the bill.')
# Get Congressional documents released on the provided date
crec = requests.get(f"https://api.govinfo.gov/packages/CREC-{YEAR}-{MONTH}-{DAY}/granules?pageSize=1000&granuleClass=DAILYDIGEST&api_key={GOVINFO_API_KEY}&offsetMark=*").json()
# Exit if no results are found
if crec["count"] == 0:
print("No results found.")
exit()
# Get info about the Daily Digests from the House and Senate
daily_digest_house = next((item for item in crec["granules"] if item["title"] == "Daily Digest/House of Representatives"), None)
daily_digest_senate = next((item for item in crec["granules"] if item["title"] == "Daily Digest/Senate"), None)
# Exit if no results are found
if daily_digest_house is None or daily_digest_senate is None:
print("Daily Digest not found.")
exit()
# Get links for the Daily Digests from the House and Senate
daily_digest_house_rec = requests.get(f"{daily_digest_house['granuleLink']}?api_key={GOVINFO_API_KEY}").json()
daily_digest_senate_rec = requests.get(f"{daily_digest_senate['granuleLink']}?api_key={GOVINFO_API_KEY}").json()
# Get the text of the Daily Digests from the House and Senate
daily_digest_house_soup = BeautifulSoup(requests.get(f"{daily_digest_house_rec['download']['txtLink']}?api_key={GOVINFO_API_KEY}").content, 'html.parser')
daily_digest_senate_soup = BeautifulSoup(requests.get(f"{daily_digest_senate_rec['download']['txtLink']}?api_key={GOVINFO_API_KEY}").content, 'html.parser')
# Replace multiple spaces with a single space to reduce # of tokens and save on API costs ;)
daily_digest_house_text = re.sub(' +', ' ', daily_digest_house_soup.pre.text)
daily_digest_senate_text = re.sub(' +', ' ', daily_digest_senate_soup.pre.text)
# The magic: extract bills from the Daily Digests using Marvin!
bills = marvin.extract(daily_digest_house_text + "\n" + daily_digest_senate_text, Bill)
# Print the bills!
print("Bills:\n")
for bill in bills:
print(f"{bill.number}: {bill.title}\n{bill.summary}\n\n")