-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts that fetch external content
- Loading branch information
1 parent
6fbfa62
commit fa74d47
Showing
24 changed files
with
1,455 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Order in which to run the scripts: | ||
|
||
1. fetchExternalContentMetaData.js | ||
2. fetchExternalContent.js | ||
3. addHTMLstructureToExternalContent.js |
132 changes: 132 additions & 0 deletions
132
fetchExternalContent/fetchAnnotatedCopies/addHTMLstructureToExternalContent.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
#!/usr/bin/env node | ||
|
||
/* | ||
Author: Kor Dwarshuis | ||
Created: 2023 | ||
Updated: - | ||
Description: | ||
Markdown to Bootstrap Accordion Converter | ||
This script automates the conversion of Markdown files in the directoryPath directory into Bootstrap accordion format. | ||
It imports a JSON file named 'externalContentMetaData.json' to create a mapping of anchor tags to 'Level' attributes, which | ||
are then used as data attributes in the generated Bootstrap accordions. | ||
Features: | ||
1. Reads all Markdown (.md) files in the specified directory. | ||
2. Imports 'Level' attributes from an external JSON file. | ||
3. Converts all headings in the Markdown files to H2. | ||
4. Wraps sections under H2 headings in Bootstrap accordion divs, utilizing the imported 'Level' as a data attribute. | ||
5. Writes the updated content back into each Markdown file. | ||
Dependencies: | ||
- Node.js built-in modules: 'fs' for file system operations, 'path' for path manipulations. | ||
Logging: | ||
Outputs a log message for each successfully updated file. | ||
*/ | ||
|
||
|
||
|
||
|
||
const fs = require('fs'); | ||
const path = require('path'); | ||
require('dotenv').config(); | ||
|
||
// Directory path | ||
const directoryPath = process.env.ANNOTATED_COPIES_OUTPUT_DIR; | ||
|
||
// Import external JSON object TODO: fix the way the path is constructed | ||
const externalContentMetaData = require(path.join(__dirname, '../.' + process.env.ANNOTATED_COPIES_INPUT_DIR)); | ||
|
||
|
||
// Create mapping from the imported JSON | ||
let dataAttributeMap = {}; | ||
externalContentMetaData.values.slice(1).forEach(row => { | ||
let anchor = row[5]; | ||
if (anchor) { | ||
// Remove everything before the last "#" | ||
anchor = anchor.split("#").pop().toLowerCase().replace(/\s/g, '-'); | ||
dataAttributeMap[anchor] = row[11]; // using 'Level' as the data attribute | ||
} | ||
}); | ||
|
||
|
||
fs.readdir(directoryPath, (err, files) => { | ||
if (err) { | ||
return console.log('Unable to scan directory: ' + err); | ||
} | ||
|
||
// Process all .md files | ||
files.filter(file => path.extname(file) === '.md').forEach(file => { | ||
const markdownFilePath = path.join(directoryPath, file); | ||
|
||
fs.readFile(markdownFilePath, 'utf8', (err, data) => { | ||
if (err) { | ||
console.error(`Failed to read file ${file}:`, err); | ||
return; | ||
} | ||
|
||
// Replace all headings with H2 | ||
let updatedData = data.replace(/^(#{1,6}) (.*$)/gm, '## $2'); | ||
|
||
// Wrap H2 sections in divs with data-attributes | ||
updatedData = updatedData.split(/\n(?=## )/g).map(section => { | ||
let match = section.match(/## (.*)$/m); | ||
let heading = match ? match[1] : null; | ||
let anchor = heading ? heading.toLowerCase() : Math.floor(Math.random() * 10000000000000).toString(); | ||
anchor = anchor | ||
.replace(/\s/g, '-') | ||
.replace(/&/g, '-') | ||
.replace(/\//g, '-') | ||
.replace(/\\/g, '-') | ||
.replace(/</g, '-') | ||
.replace(/>/g, '-') | ||
.replace(/\(/g, '-') | ||
.replace(/\)/g, '-') | ||
.replace(/'/g, '-') | ||
.replace(/`/g, '-') | ||
.replace(/,/g, '-') | ||
.replace(/\./g, '-') | ||
.replace(/;/g, '-') | ||
.replace(/:/g, '-') | ||
.replace(/\?/g, '-') | ||
.replace(/\?/g, '-') | ||
.replace(/!/g, '-') | ||
.replace(/"/g, '-') | ||
; | ||
let dataAttribute = dataAttributeMap[anchor] || '1'; | ||
|
||
// Creating Bootstrap Accordion | ||
// the “\n\n” must be added or the code will fail | ||
return ` | ||
\n\n<div className="accordion-item" data-level="${dataAttribute}"> | ||
\n\n<h2 className="accordion-header" id="header${anchor}"> | ||
\n\n<button className="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#accordeon-${anchor}" aria-expanded="false" aria-controls="accordeon-${anchor}"> | ||
${anchor}, level ${dataAttribute} | ||
\n\n</button> | ||
\n\n</h2> | ||
\n\n<div id="accordeon-${anchor}" className="accordion-collapse collapse"> | ||
\n\n<div className="accordion-body"> | ||
\n\n${section} | ||
\n\n</div> | ||
\n\n</div> | ||
\n\n</div> | ||
`; | ||
}).join('\n'); | ||
|
||
// Wrap all content in a div with the accordion className | ||
updatedData = `<div className="accordion accordion-flush" id="annotated-copies">` + updatedData + `</div>`; | ||
|
||
// Write to the file | ||
fs.writeFile(markdownFilePath, updatedData, (err) => { | ||
if (err) { | ||
console.error(`Failed to write to file ${file}:`, err); | ||
return; | ||
} | ||
|
||
console.log(`Successfully updated markdown file: ${file}`); | ||
}); | ||
}); | ||
}); | ||
}); |
144 changes: 144 additions & 0 deletions
144
fetchExternalContent/fetchAnnotatedCopies/fetchExternalContent.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#!/usr/bin/env node | ||
|
||
/* | ||
Author: Kor Dwarshuis | ||
Created: 2023 | ||
Updated: - | ||
Description: | ||
This script consumes the data produced by the 'fetchExternalContentMetaData.js' script. | ||
This script performs the following tasks: | ||
1. Reads the 'externalContentMetaData.json' file located in the './static/json/' directory to obtain a list of URLs. | ||
2. Downloads Markdown files (.md) from the URLs and stores them in the outputFileLocation directory. | ||
3. Cleans up the downloaded Markdown files by: | ||
- Replacing Markdown links without URLs. | ||
- Removing the first line if it contains "---". | ||
Configuration: | ||
- `inputFileLocation`: Directory and filename where the JSON file containing URLs is located. | ||
- `outputFileLocation`: Directory where the downloaded files will be stored. | ||
The code utilizes Node.js and its 'fs', 'path', and 'https' modules to read files, manage directories, and download content. | ||
Promises are used for asynchronous operations. | ||
*/ | ||
|
||
const fs = require('fs'); | ||
const https = require('https'); | ||
require('dotenv').config(); | ||
|
||
// Config | ||
const inputFileLocation = process.env.ANNOTATED_COPIES_INPUT_DIR; | ||
const outputFileLocation = process.env.ANNOTATED_COPIES_OUTPUT_DIR; // Where to copy the files to | ||
// End Config | ||
|
||
// Create the output directory if it doesn't exist | ||
if (!fs.existsSync(outputFileLocation)) { | ||
fs.mkdirSync(outputFileLocation, { recursive: true }); | ||
} | ||
|
||
function readFileAsync(filePath) { | ||
return new Promise((resolve, reject) => { | ||
fs.readFile(filePath, 'utf8', (err, data) => { | ||
if (err) { | ||
reject(err); | ||
return; | ||
} | ||
|
||
try { | ||
const inputData = JSON.parse(data); | ||
resolve(inputData); | ||
} catch (err) { | ||
reject(err); | ||
} | ||
}); | ||
}); | ||
} | ||
|
||
function processJSON(json) { | ||
// Used for naming the downloaded file: Remove the protocol from the URL, this is done to ensure that the file name is valid (no colons, slashes, etc.) | ||
function removeProtocol(inputString) { | ||
if (inputString.startsWith("https://")) { | ||
inputString = inputString.substring(8); | ||
} else if (inputString.startsWith("http://")) { | ||
inputString = inputString.substring(7); | ||
} | ||
let transformedString = inputString.replace('raw.githubusercontent.com/', ''); | ||
transformedString = transformedString.replace(/\//g, "-"); | ||
return transformedString; | ||
} | ||
|
||
json.values.forEach((item, index) => { | ||
if (item[1] === 'Source') return;// First row is the header | ||
if (item[1] === '') return;// Skip rows when there is no URL | ||
if (item[1] === undefined) return; // Skip rows when there is no URL | ||
const transformedUrl = removeProtocol(item[1]); | ||
|
||
// only copy markdown files | ||
if (!item[1].endsWith(".md")) { return; } | ||
downloadFile(item[1], outputFileLocation + transformedUrl); | ||
}); | ||
} | ||
|
||
function downloadFile(url, destination) { | ||
return new Promise((resolve, reject) => { | ||
const file = fs.createWriteStream(destination); | ||
|
||
https.get(url, response => { | ||
response.pipe(file); | ||
|
||
file.on('finish', () => { | ||
file.close(); | ||
resolve(); | ||
cleanUpFile(destination); | ||
}); | ||
}).on('error', error => { | ||
fs.unlink(destination, () => { | ||
reject(error); | ||
}); | ||
}); | ||
}); | ||
} | ||
|
||
function cleanUpFile(filePath) { | ||
fs.readFile(filePath, 'utf8', (err, data) => { | ||
if (err) { | ||
console.error('Error reading file:', err); | ||
return; | ||
} | ||
|
||
let updatedContent = data; | ||
|
||
// Check and replace Markdown links without URLs | ||
const regex = /\[([^\]]+)\]\(\)/g; | ||
updatedContent = updatedContent.replace(regex, '$1'); | ||
|
||
// Check and remove first line if it's "---" | ||
const lines = updatedContent.split('\n'); | ||
if (lines[0] === '---') { | ||
lines.shift(); // Remove the first line | ||
updatedContent = lines.join('\n'); | ||
} | ||
|
||
if (data !== updatedContent) { | ||
fs.writeFile(filePath, updatedContent, 'utf8', (err) => { | ||
if (err) { | ||
console.error('Error saving file:', err); | ||
} else { | ||
console.log('File updated successfully.'); | ||
} | ||
}); | ||
} else { | ||
console.log('No changes required. File remains unchanged.'); | ||
} | ||
}); | ||
|
||
} | ||
|
||
readFileAsync(inputFileLocation) | ||
.then((input) => { | ||
processJSON(input); | ||
}) | ||
.catch((err) => { | ||
console.error('Error reading file:', err); | ||
}); | ||
|
82 changes: 82 additions & 0 deletions
82
fetchExternalContent/fetchAnnotatedCopies/fetchExternalContentMetaData.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#!/usr/bin/env node | ||
|
||
/* | ||
Author: Kor Dwarshuis | ||
Created: 2023 | ||
Updated: - | ||
Description: | ||
This script creates the data for the fetchExternalContent.js script. | ||
This Node.js script performs the following tasks: | ||
1. Sends an HTTP GET request to a Google Sheets API endpoint (“WOT-terms” Google Sheet, tab “LabelContent”) to fetch JSON-formatted data (see https://sheets.googleapis.com/v4/spreadsheets/18IUa-1NSJ_8Tz_2D-VSuSQa_yf3ES1s_hovitm3Clvc/values/LabelContent?alt=json&key=AIzaSyCA4sOfLTriHKjaQftREYWMnQNokDHf_tM). | ||
- The URL of the Google Sheet API endpoint is hardcoded within the script. | ||
2. Receives and accumulates the JSON data in chunks as it is streamed from the Google Sheet API. | ||
3. Once all data is received, it writes the JSON data to a file named 'externalContentMetaData.json' in the './static/json/' directory. | ||
Configuration: | ||
- `outputDirJSON`: Directory where the output JSON file will be stored. | ||
- `outputFileNameJSON`: Name of the output JSON file. | ||
Note: | ||
- The script should be run from the root of the project. | ||
- For information on how to create a JSON endpoint from a Google Sheet, refer to https://stackoverflow.com/a68854199 | ||
The code uses the Node.js 'fs', 'path', and 'https' modules to manage directories, write files, and perform HTTPS GET requests. | ||
*/ | ||
|
||
const fs = require('fs'); | ||
const path = require('path'); | ||
const https = require('https'); | ||
require('dotenv').config(); | ||
|
||
// Config | ||
const outputDirJSON = './static/json/'; //TODO: find a better place for this file | ||
const outputFileNameJSON = 'externalContentMetaData.json'; | ||
// End Config | ||
|
||
|
||
// How to create JSON endpoint from Google Sheet: https://stackoverflow.com/a/68854199 | ||
// const url = | ||
// 'https://sheets.googleapis.com/v4/spreadsheets/18IUa-1NSJ_8Tz_2D-VSuSQa_yf3ES1s_hovitm3Clvc/values/LabelContentTempCopy?alt=json&key=AIzaSyCA4sOfLTriHKjaQftREYWMnQNokDHf_tM'; | ||
const url = process.env.ANNOTATED_COPIES_JSON_ENDPOINT; | ||
|
||
https | ||
.get(url, (resp) => { | ||
let data = ''; | ||
|
||
// A chunk of data has been received. | ||
resp.on('data', (chunk) => { | ||
data += chunk; | ||
}); | ||
|
||
// The whole response has been received. Print out the result. | ||
resp.on('end', () => { | ||
writeJSONFile(data); | ||
}); | ||
}) | ||
.on('error', (err) => { | ||
console.log('Error: ' + err.message); | ||
}); | ||
|
||
function writeJSONFile(content) { | ||
// Create the output directory if it doesn't exist | ||
if (!fs.existsSync(outputDirJSON)) { | ||
fs.mkdirSync(outputDirJSON, { recursive: true }); | ||
} | ||
|
||
// Path to the output file | ||
const filePath = path.join(outputDirJSON, outputFileNameJSON); | ||
|
||
fs.writeFile( | ||
filePath, | ||
content, | ||
function (err) { | ||
if (err) { | ||
return console.log(err); | ||
} | ||
console.log('JSON file has been written successfully.'); | ||
} | ||
); | ||
} // End writeJSONFile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
# Run the first script | ||
node fetchExternalContent/fetchAnnotatedCopies/fetchExternalContentMetaData.js | ||
|
||
# Run the second script | ||
node fetchExternalContent/fetchAnnotatedCopies/fetchExternalContent.js | ||
|
||
# Run the third script | ||
node fetchExternalContent/fetchAnnotatedCopies/addHTMLstructureToExternalContent.js |
Oops, something went wrong.