-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcontent.js
136 lines (114 loc) · 4.12 KB
/
content.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
console.log("Content.js received message:", request);
if (request.type === "extractContent") {
const content = extractContent();
chrome.runtime.sendMessage({ type: "extractedContent", content: content });
} else if (request.type === "ping") {
// Respond to ping
sendResponse({ status: "ok" });
}
});
// Function to extract article content
function extractContent() {
// Helper function to get inner text while preserving some paragraph breaks
function getInnerText(node) {
let text = node.innerText || node.textContent;
return text.trim().replace(/\s{2,}/g, " ");
}
// Helper function to calculate text density
function getTextDensity(element) {
const text = getInnerText(element);
const length = text.length;
if (length === 0) return 0;
const linkLength = Array.from(element.getElementsByTagName("a")).reduce(
(total, link) => total + getInnerText(link).length,
0
);
return (length - linkLength) / length;
}
// First try to find article content using common article selectors
const articleSelectors = [
"article",
'[role="main"]',
'[role="article"]',
"main",
"#main-content",
".post-content",
".article-content",
".article-body",
".entry-content",
".content-body",
".story-body"
];
let articleContent = null;
// Try each selector until we find content
for (const selector of articleSelectors) {
const element = document.querySelector(selector);
if (element && getInnerText(element).length > 500) {
articleContent = element;
break;
}
}
// If no article found, look for the largest content block
if (!articleContent) {
// Get all potential content blocks
const contentBlocks = Array.from(document.getElementsByTagName("*")).filter((node) => {
// Skip elements that are usually not content
if (["script", "style", "nav", "header", "footer"].includes(node.tagName.toLowerCase())) {
return false;
}
// Skip invisible elements
const style = window.getComputedStyle(node);
if (style.display === "none" || style.visibility === "hidden") {
return false;
}
// Skip elements with suspicious class names
const className = (node.className || "").toLowerCase();
if (/(comment|meta|footer|header|menu|nav|sidebar|widget)/.test(className)) {
return false;
}
// Consider blocks with substantial text
const text = getInnerText(node);
return text.length > 200;
});
// Score content blocks based on various metrics
const scoredBlocks = contentBlocks.map((block) => {
let score = 0;
// Prefer blocks with more text
score += getInnerText(block).length / 100;
// Prefer blocks with higher text density (less links)
score += getTextDensity(block) * 10;
// Prefer blocks with paragraphs
score += block.getElementsByTagName("p").length * 3;
// Prefer blocks with few ads-like elements
const suspiciousTerms = /(share|social|comment|advertisement|sidebar)/i;
if (suspiciousTerms.test(block.className + " " + block.id)) {
score -= 10;
}
return { block, score };
});
// Select the block with highest score
if (scoredBlocks.length > 0) {
scoredBlocks.sort((a, b) => b.score - a.score);
articleContent = scoredBlocks[0].block;
}
}
// Clean up the selected content
if (articleContent) {
// Remove known non-content elements
const elementsToRemove = articleContent.querySelectorAll(
'script, style, iframe, nav, header, footer, [role="complementary"]'
);
elementsToRemove.forEach((el) => el.remove());
return getInnerText(articleContent);
}
// Fallback to body text if nothing better found
return document.body.innerText;
}
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.type === "summarizationResult") {
const summary = request.summary;
console.log("Received summary:", summary);
// Display the summary or perform any desired action with the summary
}
});