-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathupload.js
92 lines (77 loc) · 3.07 KB
/
upload.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
const { MongoClient, ServerApiVersion } = require("mongodb");
const fs = require("fs");
const path = require("path");
const uri = "mongodb+srv://tinkerquestproject:[email protected]/?retryWrites=true&w=majority&appName=Cluster0";
const client = new MongoClient(uri, {
serverApi: {
version: ServerApiVersion.v1,
strict: true,
deprecationErrors: true,
}
});
// Directory containing the JSON files
const dataDir = path.join(__dirname, "crawler_output");
async function uploadData() {
try {
await client.connect();
console.log("Connected to MongoDB");
const db = client.db("crawler");
const collection = db.collection("data");
const files = fs.readdirSync(dataDir);
for (const file of files) {
const filePath = path.join(dataDir, file);
let fileContents = fs.readFileSync(filePath, "utf8");
// Add closing bracket if necessary
fileContents = finalizeJsonContent(fileContents, filePath);
// Clean up the content by removing extra commas or malformed content
fileContents = removeTrailingCommas(fileContents);
fileContents = sanitizeJSON(fileContents);
// If the content is invalid, skip or fix it
if (!fileContents) {
console.error(`Skipping invalid content in ${file}`);
continue;
}
try {
const jsonData = JSON.parse(fileContents);
// Insert the data into MongoDB
for (const data of jsonData) {
await collection.insertOne(data);
console.log(`Uploaded data from ${file}`);
}
} catch (parseError) {
console.error(`Error parsing JSON in ${file}: ${parseError.message}`);
}
}
console.log("All data uploaded!");
} catch (err) {
console.error("Error uploading data:", err);
} finally {
await client.close();
console.log("MongoDB connection closed.");
}
}
// Function to add a closing bracket "]" to JSON files that are missing it
function finalizeJsonContent(content, filePath) {
// Check if the content is a non-empty string and doesn't end with ']'
if (content && content.trim() && !content.trim().endsWith(']')) {
console.log(`Adding closing bracket to ${filePath}`);
content = content.trim() + "\n]"; // Append closing bracket
fs.writeFileSync(filePath, content, "utf8"); // Overwrite file with fixed content
}
return content;
}
// Function to remove extra commas from JSON data
function removeTrailingCommas(data) {
// Remove any trailing commas after objects or arrays
return data.replace(/,(\s*[\}\]])/g, "$1");
}
// Function to sanitize and ensure valid JSON
function sanitizeJSON(data) {
// Remove any invalid 'undefined' or non-JSON content
if (data === "undefined" || !data.trim()) {
console.error("Empty or undefined JSON content found.");
return null;
}
return data;
}
uploadData();