Skip to content

Commit

Permalink
Add scripts that fetch external content
Browse files Browse the repository at this point in the history
  • Loading branch information
kordwarshuis committed Jul 12, 2024
1 parent 6fbfa62 commit fa74d47
Show file tree
Hide file tree
Showing 24 changed files with 1,455 additions and 0 deletions.
5 changes: 5 additions & 0 deletions fetchExternalContent/fetchAnnotatedCopies/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Order in which to run the scripts:

1. fetchExternalContentMetaData.js
2. fetchExternalContent.js
3. addHTMLstructureToExternalContent.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env node

/*
Author: Kor Dwarshuis
Created: 2023
Updated: -
Description:
Markdown to Bootstrap Accordion Converter
This script automates the conversion of Markdown files in the directoryPath directory into Bootstrap accordion format.
It imports a JSON file named 'externalContentMetaData.json' to create a mapping of anchor tags to 'Level' attributes, which
are then used as data attributes in the generated Bootstrap accordions.
Features:
1. Reads all Markdown (.md) files in the specified directory.
2. Imports 'Level' attributes from an external JSON file.
3. Converts all headings in the Markdown files to H2.
4. Wraps sections under H2 headings in Bootstrap accordion divs, utilizing the imported 'Level' as a data attribute.
5. Writes the updated content back into each Markdown file.
Dependencies:
- Node.js built-in modules: 'fs' for file system operations, 'path' for path manipulations.
Logging:
Outputs a log message for each successfully updated file.
*/




const fs = require('fs');
const path = require('path');
require('dotenv').config();

// Directory path
const directoryPath = process.env.ANNOTATED_COPIES_OUTPUT_DIR;

// Import external JSON object TODO: fix the way the path is constructed
const externalContentMetaData = require(path.join(__dirname, '../.' + process.env.ANNOTATED_COPIES_INPUT_DIR));


// Create mapping from the imported JSON
let dataAttributeMap = {};
externalContentMetaData.values.slice(1).forEach(row => {
let anchor = row[5];
if (anchor) {
// Remove everything before the last "#"
anchor = anchor.split("#").pop().toLowerCase().replace(/\s/g, '-');
dataAttributeMap[anchor] = row[11]; // using 'Level' as the data attribute
}
});


fs.readdir(directoryPath, (err, files) => {
if (err) {
return console.log('Unable to scan directory: ' + err);
}

// Process all .md files
files.filter(file => path.extname(file) === '.md').forEach(file => {
const markdownFilePath = path.join(directoryPath, file);

fs.readFile(markdownFilePath, 'utf8', (err, data) => {
if (err) {
console.error(`Failed to read file ${file}:`, err);
return;
}

// Replace all headings with H2
let updatedData = data.replace(/^(#{1,6}) (.*$)/gm, '## $2');

// Wrap H2 sections in divs with data-attributes
updatedData = updatedData.split(/\n(?=## )/g).map(section => {
let match = section.match(/## (.*)$/m);
let heading = match ? match[1] : null;
let anchor = heading ? heading.toLowerCase() : Math.floor(Math.random() * 10000000000000).toString();
anchor = anchor
.replace(/\s/g, '-')
.replace(/&/g, '-')
.replace(/\//g, '-')
.replace(/\\/g, '-')
.replace(/</g, '-')
.replace(/>/g, '-')
.replace(/\(/g, '-')
.replace(/\)/g, '-')
.replace(/'/g, '-')
.replace(/`/g, '-')
.replace(/,/g, '-')
.replace(/\./g, '-')
.replace(/;/g, '-')
.replace(/:/g, '-')
.replace(/\?/g, '-')
.replace(/\?/g, '-')
.replace(/!/g, '-')
.replace(/"/g, '-')
;
let dataAttribute = dataAttributeMap[anchor] || '1';

// Creating Bootstrap Accordion
// the “\n\n” must be added or the code will fail
return `
\n\n<div className="accordion-item" data-level="${dataAttribute}">
\n\n<h2 className="accordion-header" id="header${anchor}">
\n\n<button className="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#accordeon-${anchor}" aria-expanded="false" aria-controls="accordeon-${anchor}">
${anchor}, level ${dataAttribute}
\n\n</button>
\n\n</h2>
\n\n<div id="accordeon-${anchor}" className="accordion-collapse collapse">
\n\n<div className="accordion-body">
\n\n${section}
\n\n</div>
\n\n</div>
\n\n</div>
`;
}).join('\n');

// Wrap all content in a div with the accordion className
updatedData = `<div className="accordion accordion-flush" id="annotated-copies">` + updatedData + `</div>`;

// Write to the file
fs.writeFile(markdownFilePath, updatedData, (err) => {
if (err) {
console.error(`Failed to write to file ${file}:`, err);
return;
}

console.log(`Successfully updated markdown file: ${file}`);
});
});
});
});
144 changes: 144 additions & 0 deletions fetchExternalContent/fetchAnnotatedCopies/fetchExternalContent.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/env node

/*
Author: Kor Dwarshuis
Created: 2023
Updated: -
Description:
This script consumes the data produced by the 'fetchExternalContentMetaData.js' script.
This script performs the following tasks:
1. Reads the 'externalContentMetaData.json' file located in the './static/json/' directory to obtain a list of URLs.
2. Downloads Markdown files (.md) from the URLs and stores them in the outputFileLocation directory.
3. Cleans up the downloaded Markdown files by:
- Replacing Markdown links without URLs.
- Removing the first line if it contains "---".
Configuration:
- `inputFileLocation`: Directory and filename where the JSON file containing URLs is located.
- `outputFileLocation`: Directory where the downloaded files will be stored.
The code utilizes Node.js and its 'fs', 'path', and 'https' modules to read files, manage directories, and download content.
Promises are used for asynchronous operations.
*/

const fs = require('fs');
const https = require('https');
require('dotenv').config();

// Config
const inputFileLocation = process.env.ANNOTATED_COPIES_INPUT_DIR;
const outputFileLocation = process.env.ANNOTATED_COPIES_OUTPUT_DIR; // Where to copy the files to
// End Config

// Create the output directory if it doesn't exist
if (!fs.existsSync(outputFileLocation)) {
fs.mkdirSync(outputFileLocation, { recursive: true });
}

function readFileAsync(filePath) {
return new Promise((resolve, reject) => {
fs.readFile(filePath, 'utf8', (err, data) => {
if (err) {
reject(err);
return;
}

try {
const inputData = JSON.parse(data);
resolve(inputData);
} catch (err) {
reject(err);
}
});
});
}

function processJSON(json) {
// Used for naming the downloaded file: Remove the protocol from the URL, this is done to ensure that the file name is valid (no colons, slashes, etc.)
function removeProtocol(inputString) {
if (inputString.startsWith("https://")) {
inputString = inputString.substring(8);
} else if (inputString.startsWith("http://")) {
inputString = inputString.substring(7);
}
let transformedString = inputString.replace('raw.githubusercontent.com/', '');
transformedString = transformedString.replace(/\//g, "-");
return transformedString;
}

json.values.forEach((item, index) => {
if (item[1] === 'Source') return;// First row is the header
if (item[1] === '') return;// Skip rows when there is no URL
if (item[1] === undefined) return; // Skip rows when there is no URL
const transformedUrl = removeProtocol(item[1]);

// only copy markdown files
if (!item[1].endsWith(".md")) { return; }
downloadFile(item[1], outputFileLocation + transformedUrl);
});
}

function downloadFile(url, destination) {
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(destination);

https.get(url, response => {
response.pipe(file);

file.on('finish', () => {
file.close();
resolve();
cleanUpFile(destination);
});
}).on('error', error => {
fs.unlink(destination, () => {
reject(error);
});
});
});
}

function cleanUpFile(filePath) {
fs.readFile(filePath, 'utf8', (err, data) => {
if (err) {
console.error('Error reading file:', err);
return;
}

let updatedContent = data;

// Check and replace Markdown links without URLs
const regex = /\[([^\]]+)\]\(\)/g;
updatedContent = updatedContent.replace(regex, '$1');

// Check and remove first line if it's "---"
const lines = updatedContent.split('\n');
if (lines[0] === '---') {
lines.shift(); // Remove the first line
updatedContent = lines.join('\n');
}

if (data !== updatedContent) {
fs.writeFile(filePath, updatedContent, 'utf8', (err) => {
if (err) {
console.error('Error saving file:', err);
} else {
console.log('File updated successfully.');
}
});
} else {
console.log('No changes required. File remains unchanged.');
}
});

}

readFileAsync(inputFileLocation)
.then((input) => {
processJSON(input);
})
.catch((err) => {
console.error('Error reading file:', err);
});

Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env node

/*
Author: Kor Dwarshuis
Created: 2023
Updated: -
Description:
This script creates the data for the fetchExternalContent.js script.
This Node.js script performs the following tasks:
1. Sends an HTTP GET request to a Google Sheets API endpoint (“WOT-terms” Google Sheet, tab “LabelContent”) to fetch JSON-formatted data (see https://sheets.googleapis.com/v4/spreadsheets/18IUa-1NSJ_8Tz_2D-VSuSQa_yf3ES1s_hovitm3Clvc/values/LabelContent?alt=json&key=AIzaSyCA4sOfLTriHKjaQftREYWMnQNokDHf_tM).
- The URL of the Google Sheet API endpoint is hardcoded within the script.
2. Receives and accumulates the JSON data in chunks as it is streamed from the Google Sheet API.
3. Once all data is received, it writes the JSON data to a file named 'externalContentMetaData.json' in the './static/json/' directory.
Configuration:
- `outputDirJSON`: Directory where the output JSON file will be stored.
- `outputFileNameJSON`: Name of the output JSON file.
Note:
- The script should be run from the root of the project.
- For information on how to create a JSON endpoint from a Google Sheet, refer to https://stackoverflow.com/a68854199
The code uses the Node.js 'fs', 'path', and 'https' modules to manage directories, write files, and perform HTTPS GET requests.
*/

const fs = require('fs');
const path = require('path');
const https = require('https');
require('dotenv').config();

// Config
const outputDirJSON = './static/json/'; //TODO: find a better place for this file
const outputFileNameJSON = 'externalContentMetaData.json';
// End Config


// How to create JSON endpoint from Google Sheet: https://stackoverflow.com/a/68854199
// const url =
// 'https://sheets.googleapis.com/v4/spreadsheets/18IUa-1NSJ_8Tz_2D-VSuSQa_yf3ES1s_hovitm3Clvc/values/LabelContentTempCopy?alt=json&key=AIzaSyCA4sOfLTriHKjaQftREYWMnQNokDHf_tM';
const url = process.env.ANNOTATED_COPIES_JSON_ENDPOINT;

https
.get(url, (resp) => {
let data = '';

// A chunk of data has been received.
resp.on('data', (chunk) => {
data += chunk;
});

// The whole response has been received. Print out the result.
resp.on('end', () => {
writeJSONFile(data);
});
})
.on('error', (err) => {
console.log('Error: ' + err.message);
});

function writeJSONFile(content) {
// Create the output directory if it doesn't exist
if (!fs.existsSync(outputDirJSON)) {
fs.mkdirSync(outputDirJSON, { recursive: true });
}

// Path to the output file
const filePath = path.join(outputDirJSON, outputFileNameJSON);

fs.writeFile(
filePath,
content,
function (err) {
if (err) {
return console.log(err);
}
console.log('JSON file has been written successfully.');
}
);
} // End writeJSONFile
10 changes: 10 additions & 0 deletions fetchExternalContent/fetchAnnotatedCopies/main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Run the first script
node fetchExternalContent/fetchAnnotatedCopies/fetchExternalContentMetaData.js

# Run the second script
node fetchExternalContent/fetchAnnotatedCopies/fetchExternalContent.js

# Run the third script
node fetchExternalContent/fetchAnnotatedCopies/addHTMLstructureToExternalContent.js
Loading

0 comments on commit fa74d47

Please sign in to comment.