-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
193 lines (155 loc) · 7.01 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import streamlit as st
import praw
import ollama
from typing import List, Dict
from operator import itemgetter
## Set to your production values
APP_ID = "GBFYIgcOttkM61vPhlztqw"
APP_SECRET = "bODxWCRaXKxX71yey5JrOzlIBZrxeg"
USER_AGENT = "testingLangchain"
def get_available_models() -> List[str]:
"""Fetch available models from Ollama."""
try:
models = ollama.list()
return [model["name"] for model in models["models"]]
except Exception as e:
st.error(f"Error fetching Ollama models: {str(e)}")
return []
def search_reddit_posts(reddit: praw.Reddit, topic: str, num_posts: int) -> List[Dict]:
"""Search Reddit for posts on a given topic and sort by score."""
post_analysis = []
try:
results = reddit.subreddit("all").search(topic, limit=num_posts)
for post in results:
analysis = {
"title": post.title,
"score": post.score,
"url": post.url,
"subreddit": post.subreddit.display_name,
}
post_analysis.append(analysis)
# Sort posts by score in descending order
return sorted(post_analysis, key=itemgetter("score"), reverse=True)
except Exception as e:
st.error(f"Error searching Reddit: {str(e)}")
return []
def calculate_weight(score: int) -> float:
"""Calculate weight for a post based on its score using a logarithmic scale."""
import math
# Using log scale to prevent extremely high-scoring posts from completely dominating
# Adding 1 to avoid log(0), and using log base 10 for more intuitive scaling
return math.log10(score + 1) + 1
def generate_analysis_prompt(topic: str, post_analysis: List[Dict]) -> str:
"""Create the analysis prompt for the LLM with weighted emphasis on higher-scoring posts."""
# Calculate total weight for percentage calculations
total_weight = sum(calculate_weight(post["score"]) for post in post_analysis)
# Generate post descriptions with their relative importance
post_descriptions = []
for post in post_analysis:
weight = calculate_weight(post["score"])
weight_percentage = (weight / total_weight) * 100
post_desc = (
f"Title: {post['title']}\n"
f"Subreddit: {post['subreddit']}\n"
f"Score: {post['score']} (Relative importance: {weight_percentage:.1f}%)\n"
f"URL: {post['url']}\n"
)
post_descriptions.append(post_desc)
analysis_text = "\n".join(post_descriptions)
return (
f"Analyze the following Reddit posts about '{topic}'. "
"These posts are sorted by score (highest to lowest), and each post has been assigned "
"a relative importance percentage based on its score. Please weight your analysis "
"accordingly, giving more consideration to higher-scoring posts as they likely "
"represent more community-validated content.\n\n"
f"{analysis_text}\n\n"
"Please provide:\n"
"1. A weighted summary of the main insights, emphasizing trends from higher-scoring posts\n"
"2. Any notable outliers or unique perspectives from lower-scoring posts\n"
"3. An overall analysis of the community's perspective on this topic"
)
def format_markdown_output(
topic: str, post_analysis: List[Dict], summary_text: str
) -> str:
"""Format the analysis results in Markdown with clear score-based organization."""
markdown_output = f"# Analysis of Reddit Topic: '{topic}'\n\n"
# Add score distribution summary
total_score = sum(post["score"] for post in post_analysis)
markdown_output += "## Score Distribution\n"
markdown_output += f"- Total Combined Score: {total_score}\n"
markdown_output += f"- Highest Score: {post_analysis[0]['score']}\n"
markdown_output += f"- Lowest Score: {post_analysis[-1]['score']}\n\n"
markdown_output += "## Posts by Popularity\n"
for post in post_analysis:
markdown_output += f"### [{post['title']}]({post['url']})\n"
markdown_output += f"- **Score**: {post['score']}\n"
markdown_output += f"- **Subreddit**: {post['subreddit']}\n"
markdown_output += f"- **Relative Weight**: {(calculate_weight(post['score']) / sum(calculate_weight(p['score']) for p in post_analysis) * 100):.1f}%\n\n"
markdown_output += "## AI Analysis\n"
markdown_output += f"{summary_text}\n"
return markdown_output
def main():
st.title("Reddit Topic Analysis")
st.write(
"Analyze Reddit posts on any topic using AI-powered insights, weighted by community engagement"
)
# Sidebar for configurations
with st.sidebar:
st.header("Settings")
# Reddit API credentials
st.subheader("Reddit Settings")
app_id = APP_ID
app_secret = APP_SECRET
user_agent = USER_AGENT
# Number of posts to analyze
num_posts = st.slider(
"Number of posts to analyze", min_value=1, max_value=20, value=5
)
st.subheader("Model Settings")
# Get available Ollama models
available_models = get_available_models()
if not available_models:
st.error(
"No Ollama models available. Please ensure Ollama is running and has models installed."
)
st.stop()
# Model selection and topic input
selected_model = st.selectbox("Select Ollama Model", available_models)
# Main content area
st.header("Topic Analysis")
topic = st.text_input("Enter a topic to analyze")
if st.button("Analyze Topic"):
if not topic:
st.warning("Please enter a topic to analyze")
return
with st.spinner("Analyzing Reddit posts..."):
try:
# Initialize Reddit client
reddit = praw.Reddit(
client_id=app_id, client_secret=app_secret, user_agent=user_agent
)
# Search Reddit and get sorted results
post_analysis = search_reddit_posts(reddit, topic, num_posts)
if not post_analysis:
st.error("No posts found for the given topic")
return
# Generate and get AI analysis
prompt = generate_analysis_prompt(topic, post_analysis)
response = ollama.generate(model=selected_model, prompt=prompt)
summary_text = response["response"]
# Format and display results
markdown_output = format_markdown_output(
topic, post_analysis, summary_text
)
st.markdown(markdown_output)
# Add download button for the analysis
st.download_button(
label="Download Analysis",
data=markdown_output,
file_name=f"reddit_analysis_{topic}.md",
mime="text/markdown",
)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()