diff --git a/benchmarks/gabe-fs-markdown-images/.gitignore b/benchmarks/gabe-fs-markdown-images/.gitignore new file mode 100644 index 0000000000000..1063e5588c8e6 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/.gitignore @@ -0,0 +1,74 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (http://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Typescript v1 declaration files +typings/ + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# dotenv environment variable files +.env* + +# gatsby files +.cache/ +public + +# Mac files +.DS_Store + +# Yarn +yarn-error.log +.pnp/ +.pnp.js +# Yarn Integrity file +.yarn-integrity +yarn.lock + +generated_articles +generated_images +generated_image_pools diff --git a/benchmarks/gabe-fs-markdown-images/LICENSE b/benchmarks/gabe-fs-markdown-images/LICENSE new file mode 100644 index 0000000000000..1180a1cf3bdb6 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Gatsbyjs + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/benchmarks/gabe-fs-markdown-images/README.md b/benchmarks/gabe-fs-markdown-images/README.md new file mode 100644 index 0000000000000..59ea307439f46 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/README.md @@ -0,0 +1,63 @@ +# Baseline Gatsby Benchmark: fs + markdown + images + +This is a baseline benchmark site in the Gabe project. + +This site in particular tracks Markdown performance for individual files per page that also have an image (not part of the markdown). + +The site can generate an arbitrary amount of super simple pages. Each page has a small header, a quote, and two small paragraphs of random text. No images, because we want to benchmark Markdown. + +The results of this benchmark can be compared to the results of the `gabe-fs-markdown` benchmark, to see a tentative impact of using images in markdown. + +## Install + +Run `yarn` or `npm install` + +## Usage + +Unlike most other gabe benchmarks, the generation part is a little more complex because it will generate image file pools first and then copy images from those pools into their destination. + +### Image generation + +Image generation is rather expensive. The default size for 128k can take 2 hours single threaded. For that reason, the image generation can use workers instead. + +Recommended way for larger pages is to first generate all the images up to the amount you're going to use. These pools will persist across benchmarks so it's a one time cost: + +For example; to generate 128k 100x100 images using 8 worker threads: + +``` +C=8 W=100 H=100 N=128000 +``` + +This will require an up to date node because workers aren't available in node 10.13, you'll get a warning if that's the case. + +The files will be generated in `generated_image_pools/jpg/wxh`. If `C` is not set then it will only add images and assume the existing images are already properly incrementally numbered, without gaps. + +If `C` is set (and used) then it will regenerate all images regardless and use that many workers to divide the work. + +### Image usage + +When you run the benchmark, or generate the random content files, it will first check whether the pools have a sufficient amount of images. If they don't then the image pool is amended (see above). + +Once the pool contains enough images for a given type/dimension, the random `.md` files are generated and for each file an image is copied from the pool as well. The copying of images is a lot faster. + +It's important to note that the pool will persist between benchamrk runs, while the randomly generated content does not. + +### Running the benchmark + +Either way, you can start a benchmark run using the following. If the pool doesn't exist or does not have enough images, images will be generated: + +```shell +W=100 H=200 N=1000 M=2 yarn bench +``` + +- `N=1000`: instructs the run to build a site of 1000 pages +- `M=2`: instructs nodejs to use up to 2gb of memory for its long term storage +- `W=100`: use images that are 100px wide +- `H=200`: use images that are 200px high +- `C=8`: (optional) force regenerate the image pool for given size and use 8 worker threads while doing so. Only need to do this once per image type+dimension. +- Deletes generates files from previous run +- Generates `N` pages with pseudo-random content, copies one image from pool per page generated +- Runs `gatsby clean` +- Runs `gatsby build` + +The default `yarn bench` will build 512 pages with 1gb memory. diff --git a/benchmarks/gabe-fs-markdown-images/gatsby-config.js b/benchmarks/gabe-fs-markdown-images/gatsby-config.js new file mode 100644 index 0000000000000..151493fcd4640 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/gatsby-config.js @@ -0,0 +1,27 @@ +module.exports = { + siteMetadata: { + title: `Gatsby FS Markdown Benchmark for Gabe`, + description: "A blog like no other blog", + author: "Bob the Blogger", + }, + plugins: [ + `gatsby-transformer-remark`, + 'gatsby-plugin-image', + { + resolve: `gatsby-source-filesystem`, + options: { + name: `blog`, + path: `${__dirname}/generated_articles`, + }, + }, + { + resolve: `gatsby-source-filesystem`, + options: { + name: `img`, + path: `${__dirname}/generated_images`, + }, + }, + 'gatsby-plugin-sharp', + 'gatsby-transformer-sharp', + ], +} diff --git a/benchmarks/gabe-fs-markdown-images/gatsby-node.js b/benchmarks/gabe-fs-markdown-images/gatsby-node.js new file mode 100644 index 0000000000000..79fbc7f5133b4 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/gatsby-node.js @@ -0,0 +1,43 @@ +const path = require(`path`) + +const blogPost = path.resolve(`./src/templates/blog-post.js`) + +exports.createPages = async ({ graphql, actions }) => { + const { createPage } = actions + + const result = await graphql(` + query { + allMarkdownRemark { + nodes { + id + frontmatter { + slug + title # used in prev/next + } + } + } + } + `) + + if (result.errors) { + throw result.errors + } + + const posts = result.data.allMarkdownRemark.nodes + + posts.forEach(({ id, frontmatter: { slug } }, index) => { + const previous = index === posts.length - 1 ? null : posts[index + 1] + const next = index === 0 ? null : posts[index - 1] + + createPage({ + path: slug, + component: blogPost, + context: { + id, + slug, + previous, + next, + }, + }) + }) +} diff --git a/benchmarks/gabe-fs-markdown-images/gen.js b/benchmarks/gabe-fs-markdown-images/gen.js new file mode 100644 index 0000000000000..c2a90a0357405 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/gen.js @@ -0,0 +1,306 @@ +const fs = require("fs") +const path = require("path") +const faker = require(`faker`) +const genJpg = require("js-image-generator") +const rimraf = require("rimraf") +const ProgressBar = require("progress") + +const C = parseInt(process.env.C, 10) || 0 // Worker count. If non-zero, shards the image generation and generates N images regardless. +const N = parseInt(process.env.N, 10) || 100 // Article count +const W = parseInt(process.env.W, 10) || 640 // Image width +const H = parseInt(process.env.H, 10) || 326 // Image height + +let Worker, isMainThread, parentPort, workerData +try { + // worker_threads is node 10.15 ... + ;({ + Worker, + isMainThread, + parentPort, + workerData, + } = require("worker_threads")) +} catch (e) { + if (C > 0) { + console.log('') + console.warn( + "!! Worker threads are supported by nodejs from node 10.15 onwards. Proceeding in single thread mode. Consider upgrading nodejs. !!" + ) + console.log('') + } +} + +if (typeof Worker !== "undefined" && !isMainThread) { + const { offset, count, width, height } = workerData + const imgDir = "./generated_image_pools/jpg/" + width + "x" + height + console.log( + "Worker; generating", + count, + "images of", + width, + "x", + height, + ". From", + offset + ".jpg", + "to", + offset + count - 1 + ".jpg", + "into", + imgDir + ) + let i = 0 + function again() { + if (i >= count) { + // The end. + return + } + + genJpg.generateImage(width, height, 80, function (err, image) { + fs.writeFileSync(path.join(imgDir, offset + i + ".jpg"), image.data) + parentPort.postMessage(1) + }) + + ++i + setImmediate(again) + } + + // Need to do this async otherwise any postMessage after the first will be blocked + setImmediate(again) + + // This is valid in toplevel in nodejs. + return +} + +console.log("Start of gen") +console.time("End of gen") + +const imgDir = "./generated_image_pools/jpg/" + W + "x" + H + +if (!fs.existsSync("./generated_image_pools")) { + fs.mkdirSync("./generated_image_pools", { recursive: true }) +} +if (!fs.existsSync(imgDir)) { + fs.mkdirSync(imgDir, { recursive: true }) +} + +generateImagePool() + .then(generateArticles) + .then(() => { + console.timeEnd("End of gen") + console.log() + }) + .catch(e => { + throw new Error(e.stack) + }) + +function generateImagePool() { + // Image generation is quite expensive so rather than regenerate the images per run, we generate + // a static pool of images and copy from that to the correct position when needed. Takes up more + // space (not a concern in this context) but is a lot faster. + // It literally takes 10 minutes to generate single thread generate 1000 images of default dimensions, 2 hours for 128k of them. + + console.log( + "Making sure there are enough", + W, + "x", + H, + "jpg images in the pool" + ) + + if (C > 0 && typeof Worker !== "undefined") { + // Ignore existing images of this size and regenerate all of them + return forceRegenerateAllWithWorkers() + } else { + if (C > 0) { + console.log("") + console.log("RUNNING SINGLE CORE !! Ignoring `C` option because it requires a newer nodejs !! RUNNING SINGLE CORE") + console.log("") + } + // Assume existing images cover entire range 0 to count-1. Only generate count to N-1, single threaded + return incrementallyRegenerateNoWorkers() + } +} + +function forceRegenerateAllWithWorkers() { + rimraf.sync(imgDir) + fs.mkdirSync(imgDir, { recursive: true }) + + let step = Math.floor(N / C) + let lastStep = N - step * (C - 1) + + console.log( + "Sharing image generation across", + C, + "processes. Each process will generate", + step, + "images in", + imgDir + ) + + function worker(offset, count) { + return new Promise((resolve, reject) => { + const worker = new Worker(__filename, { + workerData: { + offset, + count, + width: W, + height: H, + }, + }) + worker.on("message", () => bar.tick()) + worker.on("error", reject) + worker.on("exit", code => { + if (code === 0) { + resolve() + } else { + reject(new Error(`Worker stopped with exit code ${code}`)) + } + }) + }) + } + + const workers = [] + for (let i = 0; i < C; ++i) { + workers.push(worker(i * step, i === C - 1 ? lastStep : step)) + } + + const bar = new ProgressBar( + `[:bar] :current/${N} | :percent | :elapsed sec | :rate /s | :eta secs remaining`, + { + total: N, + width: 30, + renderThrottle: 50, + } + ) + + return Promise.all(workers) +} + +function incrementallyRegenerateNoWorkers() { + const count = fs.readdirSync(imgDir).length + if (count === 0 && N > 1000) { + if (C === 0) { + console.log( + "Going to use one core for image generation. Consider using `C=4 W=" + + W + + " H=" + + H + + " N=" + + N + + " node gen.js` to spread the work over 4 workers (or whatever)." + ) + if (typeof Worker === "undefined") { + console.log( + "This also requires a newer verseion of nodejs (one that supports `worker_threads`)" + ) + } + } else { + console.log( + "This is going to be expensive. Consider using a different node version to generate the pool first." + ) + } + } else if (N - count > 1000) { + if (C === 0) { + console.log( + "Going to incrementally fill the pool by " + + (N - count) + + " images single threaded." + ) + console.log( + "Consider using `C=4 W=" + + W + + " H=" + + H + + " N=" + + N + + " node gen.js` to spread the work over 4 workers (or whatever)." + ) + } else { + console.log( + "This is going to be expensive. Consider using a different node version to regenerate the whole pool first." + ) + } + } + + const bar = new ProgressBar( + `[:bar] :current/${N} | :percent | :elapsed sec | :rate /s | :eta secs remaining`, + { + total: N, + width: 30, + renderThrottle: 100, + } + ) + + bar.tick(Math.min(N, count)) + + // This is a controlled environment. Assume that all existing files represent that many images. + // If N is larger than that, add that many images to the pool first. + for (let i = count; i < N; ++i) { + genJpg.generateImage(W, H, 80, function (err, image) { + fs.writeFileSync(path.join(imgDir, i + ".jpg"), image.data) + }) + bar.tick() + } + bar.terminate() + + // At this point the image dir should contain sufficient images to cover the articles + // Each image has random noise and should be named "i.jpg", with i from 0 through N-1 + + return Promise.resolve() +} + +function generateArticles() { + // Assuming there now exists a pool of images of given dimensions, generate an article and copy + // an image per article and give it the same name. + + console.log("Generating", N, "articles with one", W, "x", H, "jpg image") + + const bar = new ProgressBar( + `[:bar] :current/${N} | :percent | :elapsed sec | :rate /s | :eta secs remaining`, + { + total: N, + width: 30, + renderThrottle: 50, + } + ) + + rimraf.sync("./generated_articles") + rimraf.sync("./generated_images") + fs.mkdirSync("./generated_articles", { recursive: true }) + fs.mkdirSync("./generated_images", { recursive: true }) + + for (let i = 0; i < N; ++i) { + const sentence = faker.lorem.sentence() + const slug = faker.helpers.slugify(sentence).toLowerCase() + fs.writeFileSync( + path.join("./generated_articles", slug + ".md"), + createArticle(i, sentence, slug) + ) + fs.copyFileSync( + path.join(imgDir, i + ".jpg"), + path.join("./generated_images", slug + ".jpg") + ) + bar.tick() + } + bar.terminate() + + console.log("Finished preparing " + N + " articles") +} + +function createArticle(n, sentence, slug) { + const desc = faker.lorem.sentence() + + return `--- +articleNumber: ${n} +title: "${sentence.replace(/"/g, '\\"')}" +description: "${desc.replace(/"/g, '\\"')}" +slug: '${slug}' +date: ${faker.date.recent(1000).toISOString().slice(0, 10)} +rngImg: ../generated_images/${slug}.jpg +--- + +# ${sentence} + +> ${desc} + +${faker.lorem.paragraphs(2)} +` +} diff --git a/benchmarks/gabe-fs-markdown-images/package.json b/benchmarks/gabe-fs-markdown-images/package.json new file mode 100644 index 0000000000000..cb34279ef5e06 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/package.json @@ -0,0 +1,44 @@ +{ + "name": "gabe-fs-markdown", + "private": true, + "description": "Benchmark site for testing baseline markdown perf with individual files per page", + "author": "Peter van der Zee ", + "version": "0.1.0", + "license": "MIT", + "scripts": { + "bench": "rm -rf generated_articles generated_images; gatsby clean; N=${N:-512} node gen.js; CI=1 node --max_old_space_size=${M:-2}000 node_modules/.bin/gatsby build", + "build": "gatsby build", + "clean": "gatsby clean", + "develop": "gatsby develop", + "format": "prettier --write \"**/*.{js,jsx,json,md}\"" + }, + "devDependencies": { + "prettier": "2.0.4" + }, + "repository": { + "type": "git", + "url": "https://github.com/gatsbyjs/gatsby/tree/master/benchmarks/" + }, + "bugs": { + "url": "https://github.com/gatsbyjs/gatsby/issues" + }, + "keywords": [ + "gatsby", + "benchmark", + "markdown" + ], + "dependencies": { + "faker": "^4.1.0", + "gatsby": "2.31.0-next.0-dev-1610018045350", + "gatsby-plugin-image": "*", + "gatsby-plugin-sharp": "2.13.0-next.0-dev-1609845921133", + "gatsby-source-filesystem": "^2", + "gatsby-transformer-remark": "^2", + "gatsby-transformer-sharp": "2.11.0-next.0-dev-1609845921133", + "js-image-generator": "*", + "progress": "*", + "react": "^16.12.0", + "react-dom": "^16.12.0", + "rimraf": "*" + } +} diff --git a/benchmarks/gabe-fs-markdown-images/src/components/bio.js b/benchmarks/gabe-fs-markdown-images/src/components/bio.js new file mode 100644 index 0000000000000..867acfe818a21 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/src/components/bio.js @@ -0,0 +1,30 @@ +/** + * Bio component that queries for data + * with Gatsby's useStaticQuery component + * + * See: https://www.gatsbyjs.org/docs/use-static-query/ + */ + +import React from "react" + +const Bio = () => { + return ( +
+

+ Written by Bob who lives and works in Fan + Srancisco building useful things. + {` `} + + You should follow him on Twitter + +

+
+ ) +} + +export default Bio diff --git a/benchmarks/gabe-fs-markdown-images/src/components/layout.js b/benchmarks/gabe-fs-markdown-images/src/components/layout.js new file mode 100644 index 0000000000000..0fb3df2756467 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/src/components/layout.js @@ -0,0 +1,72 @@ +import React from "react" +import { Link } from "gatsby" + +class Layout extends React.Component { + render() { + const { location, title, children } = this.props + const rootPath = `${__PATH_PREFIX__}/` + let header + + if (location.pathname === rootPath) { + header = ( +

+ + {title} + +

+ ) + } else { + header = ( +

+ + {title} + +

+ ) + } + return ( +
+
{header}
+
{children}
+
+ © {new Date().getFullYear()}, Built with + {` `} + Gatsby +
+
+ ) + } +} + +export default Layout diff --git a/benchmarks/gabe-fs-markdown-images/src/pages/404.js b/benchmarks/gabe-fs-markdown-images/src/pages/404.js new file mode 100644 index 0000000000000..6380810248b0a --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/src/pages/404.js @@ -0,0 +1,30 @@ +import React from "react" +import { graphql } from "gatsby" + +import Layout from "../components/layout" + +class NotFoundPage extends React.Component { + render() { + const { data } = this.props + const siteTitle = data.site.siteMetadata.title + + return ( + +

Not Found

+

You just hit a route that doesn't exist... the sadness.

+
+ ) + } +} + +export default NotFoundPage + +export const pageQuery = graphql` + query { + site { + siteMetadata { + title + } + } + } +` diff --git a/benchmarks/gabe-fs-markdown-images/src/pages/index.js b/benchmarks/gabe-fs-markdown-images/src/pages/index.js new file mode 100644 index 0000000000000..5352a2b1d798a --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/src/pages/index.js @@ -0,0 +1,69 @@ +import React from "react" +import { Link, graphql } from "gatsby" + +import Bio from "../components/bio" +import Layout from "../components/layout" + +class BlogIndex extends React.Component { + render() { + const { data } = this.props + const siteTitle = data.site.siteMetadata.title + const posts = data.allMarkdownRemark.nodes + + return ( + + + {posts.map(({ frontmatter: { title, slug, date, description } }) => { + return ( +
+
+

+ + {title} + +

+ {date} +
+
+

+

+
+ ) + })} +
+ ) + } +} + +export default BlogIndex + +export const pageQuery = graphql` + query { + site { + siteMetadata { + title + } + } + allMarkdownRemark( + limit: 100 + sort: { fields: frontmatter___date, order: DESC } + ) { + nodes { + frontmatter { + title + slug + date(formatString: "MMMM DD, YYYY") + description + } + } + } + } +` diff --git a/benchmarks/gabe-fs-markdown-images/src/templates/blog-post.js b/benchmarks/gabe-fs-markdown-images/src/templates/blog-post.js new file mode 100644 index 0000000000000..e9c59d8d413b1 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/src/templates/blog-post.js @@ -0,0 +1,92 @@ +import React from "react" +import { Link, graphql } from "gatsby" + +import Bio from "../components/bio" +import Layout from "../components/layout" +import { GatsbyImage } from "gatsby-plugin-image" + +class BlogPostTemplate extends React.Component { + render() { + const { + html, + frontmatter: { + title, + date, + rngImg: { + childImageSharp: { gatsbyImageData }, + }, + }, + } = this.props.data.markdownRemark + const siteTitle = this.props.data.site.siteMetadata.title + const { previous, next } = this.props.pageContext + + return ( + +
+
+

{title}

+

{date}

+
+
+ +
+
+ +
+
+ + +
+ ) + } +} + +export default BlogPostTemplate + +export const pageQuery = graphql` + query BlogPostById($id: String!) { + site { + siteMetadata { + title + } + } + markdownRemark(id: { eq: $id }) { + html + frontmatter { + title + rngImg { + childImageSharp { + gatsbyImageData + # gatsbyImageData(layout: FIXED, width: 125, height: 125) + } + } + date(formatString: "MMMM DD, YYYY") + } + } + } +` diff --git a/benchmarks/gabe-fs-markdown-images/static/favicon.ico b/benchmarks/gabe-fs-markdown-images/static/favicon.ico new file mode 100644 index 0000000000000..85a4d9fac03ba Binary files /dev/null and b/benchmarks/gabe-fs-markdown-images/static/favicon.ico differ diff --git a/benchmarks/gabe-fs-markdown-images/static/robots.txt b/benchmarks/gabe-fs-markdown-images/static/robots.txt new file mode 100644 index 0000000000000..eb0536286f308 --- /dev/null +++ b/benchmarks/gabe-fs-markdown-images/static/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: