diff --git a/benchmarks/query/README.md b/benchmarks/query/README.md new file mode 100644 index 0000000000000..a31dc632eb1a9 --- /dev/null +++ b/benchmarks/query/README.md @@ -0,0 +1,5 @@ +# Query benchmark + +Stress tests creating lots of queries. + +Defaults to building a site with 5k pages split evenly amongst 10 types. Set the `NUM_PAGES` environment variable to change the number of pages, and `NUM_TYPES` to change the number of types they're split over. E.g to create a site with 5 types, each with 200 pages, do `NUM_TYPES=5 NUM_PAGES=1000 gatsby build` diff --git a/benchmarks/query/bin/runQueryTiming.sh b/benchmarks/query/bin/runQueryTiming.sh new file mode 100755 index 0000000000000..021f3239acfb3 --- /dev/null +++ b/benchmarks/query/bin/runQueryTiming.sh @@ -0,0 +1,8 @@ +# Run the build (after purging .cache) and output the amount of time +# taken by the query execution phase +# +# run with `bin/runQueryTiming.sh` + +output=$(rm -rf .cache && gatsby build | grep "run graphql queries") +echo $output | cut -d' ' -f 6 + diff --git a/benchmarks/query/gatsby-node.js b/benchmarks/query/gatsby-node.js new file mode 100644 index 0000000000000..02fc7207abcc0 --- /dev/null +++ b/benchmarks/query/gatsby-node.js @@ -0,0 +1,117 @@ +const _ = require(`lodash`) +const faker = require(`faker`) +const fs = require(`fs`) + +let NUM_PAGES = 5000 +if (process.env.NUM_PAGES) { + NUM_PAGES = process.env.NUM_PAGES +} + +let NUM_TYPES = 1 +if (process.env.NUM_TYPES) { + NUM_TYPES = process.env.NUM_TYPES +} + +function newTypeName() { + return _.capitalize(_.camelCase(faker.lorem.word())) +} + +let types = [] + +// Create NUM_PAGES nodes, split over NUM_TYPES types. Each node has +// the bare minimum of content +exports.sourceNodes = ({ actions: { createNode } }) => { + for (var i = 0; i < NUM_TYPES; i++) { + types.push(newTypeName()) + } + // Create markdown nodes + const pagesPerType = NUM_PAGES / NUM_TYPES + + let step = 0 + + _.forEach(types, typeName => { + for (var i = 0; i < pagesPerType; i++) { + step++ + const id = `${typeName}${step.toString()}` + createNode({ + id, + parent: null, + children: [], + internal: { + type: typeName, + nestedId: id, + content: faker.lorem.word(), + contentDigest: step.toString(), + }, + }) + } + }) +} + +// Total hack. It would be nice if we could programatically generate +// graphQL per component. But in the meantime, we just generate the +// actual component js file with the graphql +function createPageTemplateJs(typeName) { + const lowerTypeName = _.lowerFirst(typeName) + return ` +import React from "react" +import { graphql } from "gatsby" + +export default ({ data }) => { + const node = data["${lowerTypeName}"] + return ( +
+

{node.id}. Not much ey

+
+ ) +} + +export const query = graphql\` + query($id: String!) { + ${lowerTypeName}(internal: { nestedId: { eq: $id } }) { + id + } + } +\` +` +} + +function allTypeQuery(typeName) { + return ` +{ + all${typeName}(sort: { fields: [id] }) { + edges { + node { + id + } + } + } +} +` +} + +// Create a page for each node, and write out a new component js for +// each different type to .cache/${typeName}Template.js +async function createTypePages({ graphql, actions }, typeName) { + const templateSrc = createPageTemplateJs(typeName) + const templateFilename = `./.cache/${typeName}Template.js` + fs.writeFileSync(templateFilename, templateSrc) + let result = await graphql(allTypeQuery(typeName)) + _.forEach(result.data[`all${typeName}`].edges, edge => { + const { node } = edge + actions.createPage({ + path: `/${typeName}/${node.id}/`, + component: require.resolve(templateFilename), + context: { + id: node.id, + useQueryIndex: true, + }, + }) + }) +} + +exports.createPages = async args => { + _.forEach(types, typeName => { + createTypePages(args, typeName) + }) +} diff --git a/benchmarks/query/package.json b/benchmarks/query/package.json new file mode 100644 index 0000000000000..107941577294b --- /dev/null +++ b/benchmarks/query/package.json @@ -0,0 +1,17 @@ +{ + "name": "query-benchmark", + "description": "Benchmarks for Gatsby query performance", + "license": "MIT", + "scripts": { + "develop": "gatsby develop", + "build": "gatsby build", + "serve": "gatsby serve" + }, + "dependencies": { + "faker": "^4.1.0", + "gatsby": "next", + "lodash": "^4.17.11", + "react": "^16.3.2", + "react-dom": "^16.3.2" + } +} diff --git a/benchmarks/query/recording.md b/benchmarks/query/recording.md new file mode 100644 index 0000000000000..44f90dc9a8d99 --- /dev/null +++ b/benchmarks/query/recording.md @@ -0,0 +1,88 @@ +## Summary + +Key findings: + +- loki without indexes is overall slightly faster than master, except when there are many types +- loki with indexes is about 2x faster on sites with 10k pages, and 5x faster with 20k pages. But is ever so slightly slower when those pages are split across 100 types. + +Overall, loki is a big win for sites with lots of pages of the same type. For smaller sites, the difference is negligible. + +## Benchmarks + +Performed on 2018 13" MBP. 4-core 2.7 GHz Intel Core i7. 16 GB 2133 MHz LPDDR3 + +### Gatsby master + +- Gatsby: master + +``` +query $ NUM_TYPES=1 NUM_PAGES=10000 bin/runQueryTiming.sh +21.135 +``` + +``` +query $ NUM_TYPES=100 NUM_PAGES=10000 bin/runQueryTiming.sh +13.112 +``` + +``` +query $ NUM_TYPES=1 NUM_PAGES=20000 bin/runQueryTiming.sh +67.812 +``` + +``` +query $ NUM_TYPES=100 NUM_PAGES=20000 bin/runQueryTiming.sh +24.656 +``` + +### Gatsby loki without index + +- Gatsby:loki +- Index = false +- loki nested index patch + +``` +query $ NUM_TYPES=1 NUM_PAGES=10000 bin/runQueryTiming.sh +14.834 +``` + +``` +query $ NUM_TYPES=100 NUM_PAGES=10000 bin/runQueryTiming.sh +14.676 +``` + +``` +query $ NUM_TYPES=1 NUM_PAGES=20000 bin/runQueryTiming.sh +58.377 +``` + +``` +query $ NUM_TYPES=100 NUM_PAGES=20000 bin/runQueryTiming.sh +27.486 +``` + +### Gatsby loki with index + +- Gatsby:loki +- Index = true +- loki nested index patch + +``` +query $ NUM_TYPES=1 NUM_PAGES=10000 bin/runQueryTiming.sh +8.126 +``` + +``` +query $ NUM_TYPES=100 NUM_PAGES=10000 bin/runQueryTiming.sh +15.050 +``` + +``` +query $ NUM_TYPES=1 NUM_PAGES=20000 bin/runQueryTiming.sh +12.797 +``` + +``` +query $ NUM_TYPES=100 NUM_PAGES=20000 bin/runQueryTiming.sh +27.020 +``` diff --git a/benchmarks/query/src/pages/index.js b/benchmarks/query/src/pages/index.js new file mode 100644 index 0000000000000..92b25c10168f8 --- /dev/null +++ b/benchmarks/query/src/pages/index.js @@ -0,0 +1,3 @@ +import React from "react" + +export default () =>
Hello world!
diff --git a/packages/gatsby-source-contentful/src/gatsby-node.js b/packages/gatsby-source-contentful/src/gatsby-node.js index 9f922498dc0c4..aa1f5afe019ed 100644 --- a/packages/gatsby-source-contentful/src/gatsby-node.js +++ b/packages/gatsby-source-contentful/src/gatsby-node.js @@ -216,7 +216,7 @@ exports.sourceNodes = async ( // Check if there are any ContentfulAsset nodes and if gatsby-image is installed. If so, // add fragments for ContentfulAsset and gatsby-image. The fragment will cause an error // if there's not ContentfulAsset nodes and without gatsby-image, the fragment is useless. -exports.onPreExtractQueries = async ({ store, getNodes }) => { +exports.onPreExtractQueries = async ({ store, getNodesByType }) => { const program = store.getState().program const CACHE_DIR = path.resolve( @@ -224,9 +224,8 @@ exports.onPreExtractQueries = async ({ store, getNodes }) => { ) await fs.ensureDir(CACHE_DIR) - const nodes = getNodes() - - if (!nodes.some(n => n.internal.type === `ContentfulAsset`)) { + const nodes = getNodesByType(`ContentfulAsset`) + if (!nodes || !nodes.length || nodes.length === 0) { return } diff --git a/packages/gatsby-transformer-remark/src/extend-node-type.js b/packages/gatsby-transformer-remark/src/extend-node-type.js index fe95b3fd7bc48..07827d42b1458 100644 --- a/packages/gatsby-transformer-remark/src/extend-node-type.js +++ b/packages/gatsby-transformer-remark/src/extend-node-type.js @@ -62,7 +62,7 @@ const withPathPrefix = (url, pathPrefix) => const ASTPromiseMap = new Map() module.exports = ( - { type, store, pathPrefix, getNode, getNodes, cache, reporter }, + { type, store, pathPrefix, getNode, getNodesByType, cache, reporter }, pluginOptions ) => { if (type.name !== `MarkdownRemark`) { @@ -74,7 +74,13 @@ module.exports = ( return new Promise((resolve, reject) => { // Setup Remark. - const { commonmark = true, footnotes = true, pedantic = true, gfm = true, blocks } = pluginOptions + const { + commonmark = true, + footnotes = true, + pedantic = true, + gfm = true, + blocks, + } = pluginOptions const remarkOptions = { gfm, commonmark, @@ -113,7 +119,7 @@ module.exports = ( } else { const ASTGenerationPromise = new Promise(async resolve => { if (process.env.NODE_ENV !== `production` || !fileNodes) { - fileNodes = getNodes().filter(n => n.internal.type === `File`) + fileNodes = getNodesByType(`File`) } const ast = await new Promise((resolve, reject) => { // Use Bluebird's Promise function "each" to run remark plugins serially. @@ -180,7 +186,7 @@ module.exports = ( // typegen plugins just modify the auto-generated types to add derived fields // as well as computationally expensive fields. if (process.env.NODE_ENV !== `production` || !fileNodes) { - fileNodes = getNodes().filter(n => n.internal.type === `File`) + fileNodes = getNodesByType(`File`) } // Use Bluebird's Promise function "each" to run remark plugins serially. Promise.each(pluginOptions.plugins, plugin => { @@ -249,10 +255,16 @@ module.exports = ( const addSlugToUrl = function(node) { if (node.url) { if (_.get(markdownNode, pathToSlugField) === undefined) { - console.warn(`Skipping TableOfContents. Field '${pathToSlugField}' missing from markdown node`) + console.warn( + `Skipping TableOfContents. Field '${pathToSlugField}' missing from markdown node` + ) return null } - node.url = [pathPrefix, _.get(markdownNode, pathToSlugField), node.url] + node.url = [ + pathPrefix, + _.get(markdownNode, pathToSlugField), + node.url, + ] .join(`/`) .replace(/\/\//g, `/`) } diff --git a/packages/gatsby-transformer-screenshot/src/gatsby-node.js b/packages/gatsby-transformer-screenshot/src/gatsby-node.js index 8dfbf597d2c58..7461380add999 100644 --- a/packages/gatsby-transformer-screenshot/src/gatsby-node.js +++ b/packages/gatsby-transformer-screenshot/src/gatsby-node.js @@ -16,13 +16,11 @@ const screenshotQueue = new Queue( ) exports.onPreBootstrap = ( - { store, cache, actions, createNodeId, getNodes, createContentDigest }, + { store, cache, actions, createNodeId, getNodesByType, createContentDigest }, pluginOptions ) => { const { createNode, touchNode } = actions - const screenshotNodes = getNodes().filter( - n => n.internal.type === `Screenshot` - ) + const screenshotNodes = getNodesByType(`Screenshot`) if (screenshotNodes.length === 0) { return null diff --git a/packages/gatsby-transformer-sharp/src/gatsby-node.js b/packages/gatsby-transformer-sharp/src/gatsby-node.js index 2bb6f5ac6ce6d..89a3c409a7487 100644 --- a/packages/gatsby-transformer-sharp/src/gatsby-node.js +++ b/packages/gatsby-transformer-sharp/src/gatsby-node.js @@ -3,14 +3,13 @@ const fs = require(`fs-extra`) exports.onCreateNode = require(`./on-node-create`) exports.setFieldsOnGraphQLNodeType = require(`./extend-node-type`) -exports.onPreExtractQueries = async ({ store, getNodes }) => { +exports.onPreExtractQueries = async ({ store, getNodesByType }) => { const program = store.getState().program // Check if there are any ImageSharp nodes. If so add fragments for ImageSharp. // The fragment will cause an error if there are no ImageSharp nodes. - const nodes = getNodes() - - if (!nodes.some(n => n.internal.type === `ImageSharp`)) { + const nodes = getNodesByType(`ImageSharp`) + if (!nodes || !nodes.length || nodes.length === 0) { return } diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json index bacc5354d3026..49099178c0a3d 100644 --- a/packages/gatsby/package.json +++ b/packages/gatsby/package.json @@ -83,6 +83,7 @@ "json-stringify-safe": "^5.0.1", "kebab-hash": "^0.1.2", "lodash": "^4.17.10", + "lokijs": "^1.5.5", "md5": "^2.2.1", "md5-file": "^3.1.1", "mime": "^2.2.0", diff --git a/packages/gatsby/src/bootstrap/index.js b/packages/gatsby/src/bootstrap/index.js index 261007bd6d7a5..776439ca2821d 100644 --- a/packages/gatsby/src/bootstrap/index.js +++ b/packages/gatsby/src/bootstrap/index.js @@ -19,6 +19,8 @@ const report = require(`gatsby-cli/lib/reporter`) const getConfigFile = require(`./get-config-file`) const tracer = require(`opentracing`).globalTracer() const preferDefault = require(`./prefer-default`) +const db = require(`../db`) +const nodeTracking = require(`../schema/node-tracking`) // Show stack trace on unhandled promises. process.on(`unhandledRejection`, (reason, p) => { @@ -211,6 +213,26 @@ module.exports = async (args: BootstrapArgs) => { activity.end() + // Start the nodes database (in memory loki js with interval disk + // saves). If data was saved from a previous build, it will be + // loaded here + activity = report.activityTimer(`start nodes db`, { + parentSpan: bootstrapSpan, + }) + activity.start() + const dbSaveFile = `${program.directory}/.cache/loki/loki.db` + try { + await db.start({ + saveFile: dbSaveFile, + }) + nodeTracking.trackDbNodes() + } catch (e) { + report.error( + `Error starting DB. Perhaps try deleting ${path.dirname(dbSaveFile)}` + ) + } + activity.end() + // Copy our site files to the root of the site. activity = report.activityTimer(`copy gatsby files`, { parentSpan: bootstrapSpan, diff --git a/packages/gatsby/src/bootstrap/load-plugins/load.js b/packages/gatsby/src/bootstrap/load-plugins/load.js index 89e745e532c04..ef79cb5e00a4a 100644 --- a/packages/gatsby/src/bootstrap/load-plugins/load.js +++ b/packages/gatsby/src/bootstrap/load-plugins/load.js @@ -6,6 +6,7 @@ const crypto = require(`crypto`) const glob = require(`glob`) const { store } = require(`../../redux`) const existsSync = require(`fs-exists-cached`).sync +const createNodeId = require(`../../utils/create-node-id`) function createFileContentHash(root, globPattern) { const hash = crypto.createHash(`md5`) @@ -48,7 +49,7 @@ function resolvePlugin(pluginName) { return { resolve: resolvedPath, name: packageJSON.name || pluginName, - id: `Plugin ${packageJSON.name || pluginName}`, + id: createNodeId(packageJSON.name, `Plugin`), version: packageJSON.version || createFileContentHash(resolvedPath, `**`), } @@ -72,7 +73,7 @@ function resolvePlugin(pluginName) { return { resolve: resolvedPath, - id: `Plugin ${packageJSON.name}`, + id: createNodeId(packageJSON.name, `Plugin`), name: packageJSON.name, version: packageJSON.version, } @@ -126,6 +127,15 @@ module.exports = (config = {}) => { return { ...info, + // Make sure key is unique to plugin options. E.g there could + // be multiple source-filesystem plugins, with different names + // (docs, blogs). + id: createNodeId( + plugin.options + ? plugin.name + JSON.stringify(plugin.options) + : plugin.name, + `Plugin` + ), pluginOptions: _.merge({ plugins: [] }, plugin.options), } } @@ -154,7 +164,7 @@ module.exports = (config = {}) => { // Add the site's default "plugin" i.e. gatsby-x files in root of site. plugins.push({ resolve: slash(process.cwd()), - id: `Plugin default-site-plugin`, + id: createNodeId(`default-site-plugin`, `Plugin`), name: `default-site-plugin`, version: createFileContentHash(process.cwd(), `gatsby-*`), pluginOptions: { diff --git a/packages/gatsby/src/commands/repl.js b/packages/gatsby/src/commands/repl.js index 00ce79e987e31..073dde3ece131 100644 --- a/packages/gatsby/src/commands/repl.js +++ b/packages/gatsby/src/commands/repl.js @@ -1,7 +1,8 @@ const repl = require(`repl`) const { graphql } = require(`graphql`) const bootstrap = require(`../bootstrap`) -const { store, loadNodeContent, getNodes, getNode } = require(`../redux`) +const { store, loadNodeContent } = require(`../redux`) +const { getNodes, getNodesByType, getNode } = require(`../db`) module.exports = async program => { // run bootstrap @@ -16,9 +17,10 @@ module.exports = async program => { pages, components, staticQueryComponents, - nodes, } = store.getState() + const nodes = getNodes() + const query = async query => { const result = await graphql(schema, query, {}, {}, {}) console.log(`query result: ${JSON.stringify(result)}`) @@ -35,6 +37,7 @@ module.exports = async program => { _.context.dataPaths = jsonDataPaths _.context.getNode = getNode _.context.getNodes = getNodes + _.context.getNodesByType = getNodesByType _.context.loadNodeContent = loadNodeContent _.context.nodes = [...nodes.entries()] _.context.pages = [...pages.entries()] diff --git a/packages/gatsby/src/db/index.js b/packages/gatsby/src/db/index.js new file mode 100644 index 0000000000000..f7891b05ed82d --- /dev/null +++ b/packages/gatsby/src/db/index.js @@ -0,0 +1,276 @@ +const _ = require(`lodash`) +const fs = require(`fs-extra`) +const path = require(`path`) +const invariant = require(`invariant`) +const loki = require(`lokijs`) +const lokiFsStructuredAdapter = require(`lokijs/src/loki-fs-structured-adapter`) + +///////////////////////////////////////////////////////////////////// +// DB Initialization +///////////////////////////////////////////////////////////////////// + +// Must be set using `start()` +let db + +function startDb(saveFile) { + return new Promise((resolve, reject) => { + const dbOptions = { + autoload: true, + autoloadCallback: err => { + if (err) { + reject(err) + } else { + resolve() + } + }, + autosave: true, + autosaveInterval: 1000, + } + db = new loki(saveFile, dbOptions) + }) +} + +/** + * Starts a loki database. If the file already exists, it will be + * loaded as the database state. If not, a new database will be + * created. + * + * @param {string} saveFile on disk file that the database will be + * saved and loaded from. + * @returns {Promise} promise that is resolved once the database and + * (optionally) the existing state has been loaded + */ +async function start({ saveFile }) { + if (!_.isString(saveFile)) { + throw new Error(`saveFile must be a path`) + } + const saveDir = path.dirname(saveFile) + await fs.ensureDir(saveDir) + await startDb(saveFile) +} + +/** + * Returns a reference to the database. If undefined, the db has not been + * initalized yet. Call `start()` + * + * @returns {Object} database, or undefined + */ +function getDb() { + return db +} + +/** + * Deletes all collections. + */ +function deleteAllCollections() { + if (db) { + _.forEach(db.listCollections(), collInfo => { + const coll = db.getCollection(collInfo.name) + coll.clear({ removeIndices: true }) + db.removeCollection(collInfo.name) + }) + } +} + +/** + * Deletes all collections that are empty + */ +function deleteEmptyCollections() { + if (db) { + _.forEach(db.listCollections(), collInfo => { + const coll = db.getCollection(collInfo.name) + if (coll.count() === 0) { + db.removeCollection(collInfo.name) + } + }) + } +} + +///////////////////////////////////////////////////////////////////// +// Insertions/Updates/Deletions +///////////////////////////////////////////////////////////////////// + +/** + * Creates a node in the DB. Will create a collection for the node + * type if one hasn't been created yet + * + * @param {Object} node The ndoe to add. Must have an `id` and + * `internal.type` + */ +function createNode(node) { + invariant(node.internal, `node has no "internal" field`) + invariant(node.internal.type, `node has no "internal.type" field`) + invariant(node.id, `node has no "id" field`) + + const type = node.internal.type + + let coll = db.getCollection(type) + if (!coll) { + coll = db.addCollection(type, { unique: [`id`], indices: [`id`] }) + } + + return coll.insert(node) +} + +/** + * Updates a node in the DB + * + * @param {Object} node The new node information. Will be merged over + * the old one (shallow merge) + * @param {Object} oldNode The old node to merge the new node + * over. Optional. If not supplied, the old node is found by querying + * by node.id + */ +function updateNode(node, oldNode) { + invariant(node.internal, `node has no "internal" field`) + invariant(node.internal.type, `node has no "internal.type" field`) + invariant(node.id, `node has no "id" field`) + + const type = node.internal.type + + let coll = db.getCollection(type) + if (!coll) { + invariant(coll, `${type} collection doesn't exist. When trying to update`) + } + + if (!oldNode) { + oldNode = getNode(node.id) + } + const updateNode = _.merge(oldNode, node) + + coll.update(updateNode) +} + +/** + * Deletes a node from its type collection. + * + * @param {Object} the node to delete. Must have an `id` + */ +function deleteNode(node) { + invariant(node.internal, `node has no "internal" field`) + invariant(node.internal.type, `node has no "internal.type" field`) + invariant(node.id, `node has no "id" field`) + + const type = node.internal.type + + let coll = db.getCollection(type) + if (!coll) { + invariant(coll, `${type} collection doesn't exist. When trying to delete`) + } + + if (coll.by(`id`, node.id)) { + coll.remove(node) + } else { + console.log( + `WARN: deletion of node failed because it wasn't in coll. Node = [${node}]` + ) + } +} + +///////////////////////////////////////////////////////////////////// +// Queries +///////////////////////////////////////////////////////////////////// + +/** + * Returns the node with `id` == id, or null if not found + */ +function getNode(id) { + invariant(id, `id is null`) + + // We store a collection per node type. So to lookup the node by ID, + // we first need to find which collection that node is in, which is + // accomplished by iterating through all of them until one is found. + // + // This is obviously slow. We should consider creating a lookup + // collection mapping IDs to their collections. This would require + // an additional operation per insert, but would result in faster + // lookups + const collInfo = _.find(db.listCollections(), collInfo => { + const coll = db.getCollection(collInfo.name) + return coll.by(`id`, id) + }) + if (collInfo) { + const coll = db.getCollection(collInfo.name) + return coll.by(`id`, id) + } else { + return undefined + } +} + +/** + * Returns all nodes of a type (where typeName == node.internal.type) + */ +function getNodesByType(typeName) { + const coll = db.getCollection(typeName) + if (!coll) return null + return coll.data +} + +/** + * Returns the collection of all nodes. This should be deprecated + */ +function getNodes() { + return _.flatMap(db.listCollections(), collInfo => + getNodesByType(collInfo.name) + ) +} + +/** + * Returns the list of node typeNames + */ +function getNodeTypes() { + return _.map(db.listCollections(), collInfo => collInfo.name) +} + +/** + * Looks up the node by id, records a dependency between the node and + * the path, and then returns the path + * + * @param {string} id node id to lookup + * @param {string} path the page path to record a node dependency + * against + * @returns {Object} node or undefined if not found + */ +function getNodeAndSavePathDependency(id, path) { + const { + createPageDependency, + } = require(`../redux/actions/add-page-dependency`) + const node = getNode(id) + createPageDependency({ path, nodeId: id }) + return node +} + +/** + * Determine if node has changed (by comparing its + * `internal.contentDigest` + * + * @param {string} id + * @param {string} digest + * @returns {boolean} + */ +function hasNodeChanged(id, digest) { + const node = getNode(id) + if (!node) { + return true + } else { + return node.internal.contentDigest !== digest + } +} + +module.exports = { + start, + getDb, + deleteAllCollections, + deleteEmptyCollections, + + createNode, + updateNode, + deleteNode, + + getNode, + getNodes, + getNodeTypes, + getNodesByType, + getNodeAndSavePathDependency, + hasNodeChanged, +} diff --git a/packages/gatsby/src/internal-plugins/internal-data-bridge/gatsby-node.js b/packages/gatsby/src/internal-plugins/internal-data-bridge/gatsby-node.js index a5af127a384de..449ffb2b8cb7a 100644 --- a/packages/gatsby/src/internal-plugins/internal-data-bridge/gatsby-node.js +++ b/packages/gatsby/src/internal-plugins/internal-data-bridge/gatsby-node.js @@ -6,7 +6,7 @@ const _ = require(`lodash`) const { emitter } = require(`../../redux`) const { boundActionCreators } = require(`../../redux/actions`) -const { getNode } = require(`../../redux`) +const { getNode } = require(`../../db`) function transformPackageJson(json) { const transformDeps = deps => diff --git a/packages/gatsby/src/redux/actions.js b/packages/gatsby/src/redux/actions.js index b54e66a61ffe6..9a17dfa26f42f 100644 --- a/packages/gatsby/src/redux/actions.js +++ b/packages/gatsby/src/redux/actions.js @@ -9,7 +9,7 @@ const path = require(`path`) const fs = require(`fs`) const url = require(`url`) const kebabHash = require(`kebab-hash`) -const { hasNodeChanged, getNode } = require(`./index`) +const { hasNodeChanged, getNode } = require(`../db`) const { trackInlineObjectsInRootNode } = require(`../schema/node-tracking`) const { store } = require(`./index`) const fileExistsSync = require(`fs-exists-cached`).sync @@ -601,6 +601,7 @@ actions.createNode = ( updateNodeAction = { type: `CREATE_NODE`, plugin, + oldNode, ...actionOptions, payload: node, } @@ -1093,7 +1094,8 @@ actions.createRedirect = ({ // url.parse will not cover protocol-relative urls so do a separate check for those const parsed = url.parse(toPath) const isRelativeProtocol = toPath.startsWith(`//`) - const toPathPrefix = parsed.protocol != null || isRelativeProtocol ? `` : pathPrefix + const toPathPrefix = + parsed.protocol != null || isRelativeProtocol ? `` : pathPrefix return { type: `CREATE_REDIRECT`, diff --git a/packages/gatsby/src/redux/index.js b/packages/gatsby/src/redux/index.js index 0198062671108..abb326078d280 100644 --- a/packages/gatsby/src/redux/index.js +++ b/packages/gatsby/src/redux/index.js @@ -120,44 +120,6 @@ exports.emitter = emitter /** Redux store */ exports.store = store -/** - * Get all nodes from redux store. - * - * @returns {Array} - */ -exports.getNodes = () => { - const nodes = store.getState().nodes - if (nodes) { - return Array.from(nodes.values()) - } else { - return [] - } -} -const getNode = id => store.getState().nodes.get(id) - -/** Get node by id from store. - * - * @param {string} id - * @returns {Object} - */ -exports.getNode = getNode - -/** - * Determine if node has changed. - * - * @param {string} id - * @param {string} digest - * @returns {boolean} - */ -exports.hasNodeChanged = (id, digest) => { - const node = store.getState().nodes.get(id) - if (!node) { - return true - } else { - return node.internal.contentDigest !== digest - } -} - /** * Get content for a node from the plugin that created it. * @@ -188,20 +150,6 @@ exports.loadNodeContent = node => { } } -/** - * Get node and save path dependency. - * - * @param {string} id - * @param {string} path - * @returns {Object} node - */ -exports.getNodeAndSavePathDependency = (id, path) => { - const { createPageDependency } = require(`./actions/add-page-dependency`) - const node = getNode(id) - createPageDependency({ path, nodeId: id }) - return node -} - // Start plugin runner which listens to the store // and invokes Gatsby API based on actions. require(`./plugin-runner`) diff --git a/packages/gatsby/src/redux/plugin-runner.js b/packages/gatsby/src/redux/plugin-runner.js index a12a197a8995a..89b5536b3ccbc 100644 --- a/packages/gatsby/src/redux/plugin-runner.js +++ b/packages/gatsby/src/redux/plugin-runner.js @@ -1,10 +1,11 @@ // Invoke plugins for certain actions. -const { store, emitter } = require(`./index`) +const { emitter } = require(`./index`) +const { getNode } = require(`../db`) const apiRunnerNode = require(`../utils/api-runner-node`) emitter.on(`CREATE_NODE`, action => { - const node = store.getState().nodes.get(action.payload.id) + const node = getNode(action.payload.id) const traceTags = { nodeId: node.id, nodeType: node.internal.type } apiRunnerNode(`onCreateNode`, { node, diff --git a/packages/gatsby/src/redux/reducers/nodes.js b/packages/gatsby/src/redux/reducers/nodes.js index 86f4d1f5f710f..d677e4da1b23a 100644 --- a/packages/gatsby/src/redux/reducers/nodes.js +++ b/packages/gatsby/src/redux/reducers/nodes.js @@ -1,28 +1,34 @@ +const db = require(`../../db`) + module.exports = (state = new Map(), action) => { switch (action.type) { case `DELETE_CACHE`: + db.deleteAllCollections() return new Map() case `CREATE_NODE`: { - state.set(action.payload.id, action.payload) - return state + if (action.oldNode) { + db.updateNode(action.payload, action.oldNode) + } else { + db.createNode(action.payload) + } + return new Map() } case `ADD_FIELD_TO_NODE`: case `ADD_CHILD_NODE_TO_PARENT_NODE`: - state.set(action.payload.id, action.payload) - return state + db.updateNode(action.payload) + return new Map() case `DELETE_NODE`: { - state.delete(action.payload.id) - return state + db.deleteNode(action.payload) + return new Map() } case `DELETE_NODES`: { - action.payload.forEach(id => state.delete(id)) - return state + return new Map() } default: - return state + return new Map() } } diff --git a/packages/gatsby/src/schema/__tests__/query-loki.js b/packages/gatsby/src/schema/__tests__/query-loki.js new file mode 100644 index 0000000000000..18f02225e0a7c --- /dev/null +++ b/packages/gatsby/src/schema/__tests__/query-loki.js @@ -0,0 +1,200 @@ +const queryLoki = require(`../query-loki`) +const loki = require(`lokijs`) +const db = new loki(`loki-test.json`) +const dbModule = require(`../../db`) + +jest.mock(`../../db`) + +describe(`query-loki`, () => { + describe(`convertArgs`, () => { + it(`simple filter`, async () => { + const gqlArgs = { + filter: { + id: { + eq: `1`, + }, + }, + } + const expectedArgs = { + id: { + $eq: `1`, + }, + } + const result = queryLoki.convertArgs(gqlArgs) + expect(result).toEqual(expectedArgs) + }) + + it(`nested field`, async () => { + const gqlArgs = { + filter: { + fields: { + slug: { + eq: `1`, + }, + }, + }, + } + const expectedArgs = { + "fields.slug": { + $eq: `1`, + }, + } + const result = queryLoki.convertArgs(gqlArgs) + expect(result).toEqual(expectedArgs) + }) + + it(`nested fields`, async () => { + const gqlArgs = { + filter: { + fields: { + slug: { + eq: `1`, + }, + bar: { + eq: `2`, + }, + }, + }, + } + const expectedArgs = { + "fields.slug": { + $eq: `1`, + }, + "fields.bar": { + $eq: `2`, + }, + } + const result = queryLoki.convertArgs(gqlArgs) + expect(result).toEqual(expectedArgs) + }) + + it(`nested and simple fields`, async () => { + const gqlArgs = { + filter: { + fields: { + slug: { + eq: `1`, + }, + bar: { + eq: `2`, + }, + }, + id: { + eq: `3`, + }, + }, + } + const expectedArgs = { + "fields.slug": { + $eq: `1`, + }, + "fields.bar": { + $eq: `2`, + }, + id: { + $eq: `3`, + }, + } + const result = queryLoki.convertArgs(gqlArgs) + expect(result).toEqual(expectedArgs) + }) + }) + + describe(`queries`, () => { + it(`firstOnly`, async () => { + dbModule.getDb.mockReturnValue(db) + + const typeName = `testType` + const gqlType = { name: typeName } + const node1 = { id: `0` } + const node2 = { id: `1` } + const coll = db.addCollection(typeName) + coll.insert([node1, node2]) + + const rawGqlArgs = { + filter: { + id: { + gte: 0, + }, + }, + sort: { + fields: ["id"], + }, + } + + const result = await queryLoki.runQuery({ + gqlType, + rawGqlArgs, + firstOnly: true, + }) + + expect(result).toHaveLength(1) + expect(result[0]).toHaveProperty(`id`, `0`) + }) + + it(`regex`, async () => { + dbModule.getDb.mockReturnValue(db) + + const typeName = `testType` + const gqlType = { name: typeName } + const node = { + id: `0`, + foo: `src/foobar.js`, + } + + const coll = db.addCollection(typeName) + coll.insert(node) + + const rawGqlArgs = { + filter: { + foo: { + regex: `/src.*bar.js/`, + }, + }, + } + + const result = await queryLoki.runQuery({ gqlType, rawGqlArgs }) + + expect(result).toHaveLength(1) + expect(result[0]).toHaveProperty(`id`, `0`) + }) + + it(`nested sorting`, async () => { + dbModule.getDb.mockReturnValue(db) + + const typeName = `NestedSorting` + const gqlType = { name: typeName } + const nodes = [ + { + id: `0`, + foo: { bar: `2017` }, + }, + { + id: `1`, + foo: { bar: `2016` }, + }, + { + id: `2`, + foo: { bar: `2018` }, + }, + ] + + const coll = db.addCollection(typeName) + coll.insert(nodes) + + const rawGqlArgs = { + sort: { + order: `DESC`, + fields: [`foo___bar`], + }, + } + + const result = await queryLoki.runQuery({ gqlType, rawGqlArgs }) + + expect(result).toHaveLength(3) + expect(result[0]).toHaveProperty(`id`, `2`) + expect(result[1]).toHaveProperty(`id`, `0`) + expect(result[2]).toHaveProperty(`id`, `1`) + }) + }) +}) diff --git a/packages/gatsby/src/schema/build-node-connections.js b/packages/gatsby/src/schema/build-node-connections.js index 0ec2825e2bd3f..ff7fbe5c1714c 100644 --- a/packages/gatsby/src/schema/build-node-connections.js +++ b/packages/gatsby/src/schema/build-node-connections.js @@ -10,7 +10,27 @@ const { } = require(`./infer-graphql-input-fields-from-fields`) const createSortField = require(`./create-sort-field`) const buildConnectionFields = require(`./build-connection-fields`) -const { getNodes } = require(`../redux`) +const { runQuery } = require(`./run-query`) +const { createPageDependency } = require(`../redux/actions/add-page-dependency`) +const { connectionFromArray } = require(`graphql-skip-limit`) + +function handleQueryResult({ results, resolveArgs: queryArgs, path }) { + if (results && results.length) { + const connection = connectionFromArray(results, queryArgs) + connection.totalCount = results.length + + if (results.length > 0 && results[0].internal) { + const connectionType = connection.edges[0].node.internal.type + createPageDependency({ + path, + connection: connectionType, + }) + } + return connection + } else { + return null + } +} module.exports = (types: any) => { const connections = {} @@ -62,24 +82,19 @@ module.exports = (types: any) => { }), }, }, - resolve(object, resolveArgs, b, { rootValue }) { + async resolve(object, resolveArgs, context, { rootValue }) { + const results = await runQuery({ + gqlType: type.node.type, + queryArgs: resolveArgs, + context, + firstOnly: false, + }) + let path if (typeof rootValue !== `undefined`) { path = rootValue.path } - const runSift = require(`./run-sift`) - const latestNodes = _.filter( - getNodes(), - n => n.internal.type === type.name - ) - return runSift({ - args: resolveArgs, - nodes: latestNodes, - connection: true, - path, - typeName: typeName, - type: type.node.type, - }) + return handleQueryResult({ results, resolveArgs, path }) }, } }) diff --git a/packages/gatsby/src/schema/build-node-types.js b/packages/gatsby/src/schema/build-node-types.js index 7b2e8874b602b..f1b977a0b40b5 100644 --- a/packages/gatsby/src/schema/build-node-types.js +++ b/packages/gatsby/src/schema/build-node-types.js @@ -17,10 +17,17 @@ const { inferInputObjectStructureFromNodes, } = require(`./infer-graphql-input-fields`) const { nodeInterface } = require(`./node-interface`) -const { getNodes, getNode, getNodeAndSavePathDependency } = require(`../redux`) +const { + getNode, + getNodesByType, + getNodeTypes, + getNodeAndSavePathDependency, +} = require(`../db`) +const { pluginFieldTracking } = require(`./plugin-fields`) const { createPageDependency } = require(`../redux/actions/add-page-dependency`) const { setFileNodeRootType } = require(`./types/type-file`) const { clearTypeExampleValues } = require(`./data-tree-utils`) +const { runQuery } = require(`./run-query`) import type { ProcessedNodeType } from "./infer-graphql-type" @@ -28,16 +35,24 @@ type TypeMap = { [typeName: string]: ProcessedNodeType, } -const nodesCache = new Map() +// Returns a map of type to all the nodes of that type +function getNodeGroups() { + const typeNames = getNodeTypes() + return _.reduce( + typeNames, + (groups, typeName) => { + groups[typeName] = getNodesByType(typeName) + return groups + }, + {} + ) +} module.exports = async ({ parentSpan }) => { const spanArgs = parentSpan ? { childOf: parentSpan } : {} const span = tracer.startSpan(`build schema`, spanArgs) - const types = _.groupBy( - getNodes().filter(node => node.internal && !node.internal.ignoreType), - node => node.internal.type - ) + const types = getNodeGroups() const processedTypes: TypeMap = {} clearTypeExampleValues() @@ -160,6 +175,17 @@ module.exports = async ({ parentSpan }) => { fields: mergedFieldsFromPlugins, }) + const inferredFieldNames = _.keys( + inferredInputFieldsFromPlugins.inferredFields + ) + + // Track which fields are supplied by plugins. This will determine + // if we can query by pure data or whether we have to resolve the + // fields first. See `./run-query.js` + _.forEach(inferredFieldNames, fieldName => { + pluginFieldTracking.add(fieldName) + }) + const gqlType = new GraphQLObjectType({ name: typeName, description: `Node of type ${typeName}`, @@ -187,35 +213,35 @@ module.exports = async ({ parentSpan }) => { name: typeName, type: gqlType, args: filterFields, - resolve(a, args, context) { - const runSift = require(`./run-sift`) - let latestNodes - if ( - process.env.NODE_ENV === `production` && - nodesCache.has(typeName) - ) { - latestNodes = nodesCache.get(typeName) - } else { - latestNodes = _.filter( - getNodes(), - n => n.internal.type === typeName - ) - nodesCache.set(typeName, latestNodes) - } - if (!_.isObject(args)) { - args = {} + async resolve(a, args, context) { + try { + let path = context.path || `` + + // run-query expects queries to have a filter field. For + // connection fields, the field will already present. We + // have to manually add it to single result queries + let queryArgs = _.isObject(args) ? args : {} + queryArgs = { filter: queryArgs } + + const results = await runQuery({ + gqlType, + queryArgs, + context, + firstOnly: true, + }) + + if (results.length > 0) { + const result = results[0] + const nodeId = result.id + createPageDependency({ path, nodeId }) + return result + } else { + return null + } + } catch (e) { + console.log(e) + return null } - return runSift({ - args: { - filter: { - ...args, - }, - }, - nodes: latestNodes, - path: context.path ? context.path : ``, - typeName: typeName, - type: gqlType, - }) }, }, } diff --git a/packages/gatsby/src/schema/infer-graphql-input-fields.js b/packages/gatsby/src/schema/infer-graphql-input-fields.js index 1fc0184d40cea..96a2c90dd6f27 100644 --- a/packages/gatsby/src/schema/infer-graphql-input-fields.js +++ b/packages/gatsby/src/schema/infer-graphql-input-fields.js @@ -21,7 +21,7 @@ const { } = require(`./data-tree-utils`) const { findLinkedNode } = require(`./infer-graphql-type`) -const { getNodes } = require(`../redux`) +const { getNodesByType } = require(`../db`) const is32BitInteger = require(`../utils/is-32-bit-integer`) import type { @@ -277,9 +277,7 @@ export function inferInputObjectStructureFromNodes({ if (linkedNodeCache[linkedNode.internal.type]) { value = linkedNodeCache[linkedNode.internal.type] } else { - const relatedNodes = getNodes().filter( - node => node.internal.type === linkedNode.internal.type - ) + const relatedNodes = getNodesByType(linkedNode.internal.type) value = getExampleValues({ nodes: relatedNodes, typeName: linkedNode.internal.type, diff --git a/packages/gatsby/src/schema/infer-graphql-type.js b/packages/gatsby/src/schema/infer-graphql-type.js index 87e30304c66c9..19b11de7d1d3b 100644 --- a/packages/gatsby/src/schema/infer-graphql-type.js +++ b/packages/gatsby/src/schema/infer-graphql-type.js @@ -12,7 +12,8 @@ const _ = require(`lodash`) const invariant = require(`invariant`) const { oneLine } = require(`common-tags`) -const { store, getNode, getNodes } = require(`../redux`) +const { store } = require(`../redux`) +const { getNode, getNodes, getNodesByType } = require(`../db`) const { createPageDependency } = require(`../redux/actions/add-page-dependency`) const createTypeName = require(`./create-type-name`) const createKey = require(`./create-key`) @@ -154,9 +155,8 @@ function inferFromMapping( const findNode = (fieldValue, path) => { const linkedNode = _.find( - getNodes(), - n => - n.internal.type === linkedType && _.get(n, linkedField) === fieldValue + getNodesByType(linkedType), + n => _.get(n, linkedField) === fieldValue ) if (linkedNode) { createPageDependency({ path, nodeId: linkedNode.id }) diff --git a/packages/gatsby/src/schema/node-tracking.js b/packages/gatsby/src/schema/node-tracking.js index 495e720d44bab..e11cb0229a9b3 100644 --- a/packages/gatsby/src/schema/node-tracking.js +++ b/packages/gatsby/src/schema/node-tracking.js @@ -1,5 +1,5 @@ const _ = require(`lodash`) -const { getNode, getNodes } = require(`../redux`) +const { getNode, getNodes, getDb } = require(`../db`) /** * Map containing links between inline objects or arrays @@ -32,7 +32,7 @@ const addRootNodeToInlineObject = (data, nodeId) => { const trackInlineObjectsInRootNode = node => { _.each(node, (v, k) => { // Ignore the node internal object. - if (k === `internal`) { + if (k === `internal` || k === `$loki`) { return } addRootNodeToInlineObject(v, node.id) @@ -57,7 +57,9 @@ const findRootNodeAncestor = (obj, predicate = null) => { while ( (!predicate || !predicate(rootNode)) && (rootNodeId = getRootNodeId(rootNode) || rootNode.parent) && - (getNode(rootNode.parent) !== undefined || getNode(rootNodeId)) && + ((rootNode.parent !== undefined && + getNode(rootNode.parent) !== undefined) || + getNode(rootNodeId)) && whileCount < 101 ) { if (rootNodeId) { @@ -77,14 +79,16 @@ const findRootNodeAncestor = (obj, predicate = null) => { return !predicate || predicate(rootNode) ? rootNode : null } +function trackDbNodes() { + _.each(getNodes(), node => { + trackInlineObjectsInRootNode(node) + }) +} + /** * @callback nodePredicate * @param {Node} node Node that is examined */ exports.findRootNodeAncestor = findRootNodeAncestor - -// Track nodes that are already in store -_.each(getNodes(), node => { - trackInlineObjectsInRootNode(node) -}) +exports.trackDbNodes = trackDbNodes diff --git a/packages/gatsby/src/schema/plugin-fields.js b/packages/gatsby/src/schema/plugin-fields.js new file mode 100644 index 0000000000000..d185da8b72155 --- /dev/null +++ b/packages/gatsby/src/schema/plugin-fields.js @@ -0,0 +1,2 @@ +// Hack to track pluginFields +exports.pluginFieldTracking = new Set() diff --git a/packages/gatsby/src/schema/query-loki.js b/packages/gatsby/src/schema/query-loki.js new file mode 100644 index 0000000000000..74c41c91f9100 --- /dev/null +++ b/packages/gatsby/src/schema/query-loki.js @@ -0,0 +1,194 @@ +const _ = require(`lodash`) +const Promise = require(`bluebird`) +const { getDb } = require(`../db`) +const prepareRegex = require(`./prepare-regex`) + +// Takes a raw graphql filter and converts it into a mongo-like args +// object. E.g `eq` becomes `$eq`. gqlFilter should be the raw graphql +// filter returned from graphql-js. e.g: +// +// { +// internal: { +// type: { +// eq: "TestNode" +// }, +// content: { +// glob: "et" +// } +// }, +// id: { +// glob: "12*" +// } +// } +// +// would return +// +// { +// internal: { +// type: { +// $eq: "TestNode" // append $ to eq +// }, +// content: { +// $regex: new MiniMatch(v) // convert glob to regex +// } +// }, +// id: { +// $regex: // as above +// } +// } +function toMongoArgs(gqlFilter) { + const mongoArgs = {} + _.each(gqlFilter, (v, k) => { + if (_.isPlainObject(v)) { + if (k === `elemMatch`) { + k = `$elemMatch` + } + mongoArgs[k] = toMongoArgs(v) + } else { + // Compile regex first. + if (k === `regex`) { + mongoArgs[`$regex`] = prepareRegex(v) + } else if (k === `glob`) { + const Minimatch = require(`minimatch`).Minimatch + const mm = new Minimatch(v) + mongoArgs[`$regex`] = mm.makeRe() + } else if (k === `in`) { + mongoArgs[`$contains`] = v + } else { + mongoArgs[`$${k}`] = v + } + } + }) + return mongoArgs +} + +// Converts a nested mongo args object into a dotted notation. acc +// (accumulator) must be a reference to an empty object. The converted +// fields will be added to it. E.g +// +// { +// internal: { +// type: { +// $eq: "TestNode" +// }, +// content: { +// $regex: new MiniMatch(v) +// } +// }, +// id: { +// $regex: newMiniMatch(v) +// } +// } +// +// After execution, acc would be: +// +// { +// "internal.type": { +// $eq: "TestNode" +// }, +// "internal.content": { +// $regex: new MiniMatch(v) +// }, +// "id": { +// $regex: // as above +// } +// } +function dotNestedFields(acc, o, path = ``) { + if (_.isPlainObject(o)) { + if (_.isPlainObject(_.sample(o))) { + _.forEach(o, (v, k) => { + dotNestedFields(acc, v, path + `.` + k) + }) + } else { + acc[_.trimStart(path, `.`)] = o + } + } +} + +// Converts graphQL args to a loki query +function convertArgs(gqlArgs) { + const dottedFields = {} + dotNestedFields(dottedFields, toMongoArgs(gqlArgs.filter)) + return dottedFields +} + +// Converts graphql Sort args into the form expected by loki, which is +// a vector where the first value is a field name, and the second is a +// boolean `isDesc`. Nested fields delimited by `___` are replaced by +// periods. E.g +// +// { +// fields: [ `frontmatter___date`, `id` ], +// order: `desc` +// } +// +// would return +// +// [ [ `frontmatter.date`, true ], [ `id`, true ] ] +function toSortFields(sortArgs) { + const { fields, order } = sortArgs + return _.map(fields, field => [ + field.replace(/___/g, `.`), + _.lowerCase(order) === `desc`, + ]) +} + +// Ensure there is an index for each query field. If the index already +// exists, this is a noop (handled by lokijs). +function ensureIndexes(coll, findArgs) { + _.forEach(findArgs, (v, fieldName) => { + coll.ensureIndex(fieldName) + }) +} + +/** + * Runs the graphql query over the loki nodes db. + * + * @param {Object} args. Object with: + * + * {Object} gqlType: built during `./build-node-types.js` + * + * {Object} rawGqlArgs: The raw graphql query as a js object. E.g `{ + * filter: { fields { slug: { eq: "/somepath" } } } }` + * + * {Object} context: The context from the QueryJob + * + * {boolean} firstOnly: Whether to return the first found match, or + * all matching result. + * + * @returns {promise} A promise that will eventually be resolved with + * a collection of matching objects (even if `firstOnly` is true) + */ +function runQuery({ gqlType, rawGqlArgs, context = {}, firstOnly }) { + // Clone args as for some reason graphql-js removes the constructor + // from nested objects which breaks a check in sift.js. + const gqlArgs = JSON.parse(JSON.stringify(rawGqlArgs)) + + const lokiArgs = convertArgs(gqlArgs) + + const coll = getDb().getCollection(gqlType.name) + + // Allow page creators to specify that they want indexes + // automatically created for their pages. + if (context.useQueryIndex) { + ensureIndexes(coll, lokiArgs) + } + + let chain = coll.chain().find(lokiArgs, firstOnly) + + const { sort } = gqlArgs + if (sort) { + const sortFields = toSortFields(sort) + _.forEach(sortFields, ([fieldName]) => { + coll.ensureIndex(fieldName) + }) + chain = chain.compoundsort(sortFields) + } + + return Promise.resolve(chain.data()) +} + +module.exports = { + convertArgs, + runQuery, +} diff --git a/packages/gatsby/src/schema/run-sift.js b/packages/gatsby/src/schema/query-sift.js similarity index 70% rename from packages/gatsby/src/schema/run-sift.js rename to packages/gatsby/src/schema/query-sift.js index 76de76e85b73e..93e6e26218b92 100644 --- a/packages/gatsby/src/schema/run-sift.js +++ b/packages/gatsby/src/schema/query-sift.js @@ -1,12 +1,8 @@ -// @flow const sift = require(`sift`) const _ = require(`lodash`) -const { connectionFromArray } = require(`graphql-skip-limit`) -const { createPageDependency } = require(`../redux/actions/add-page-dependency`) const prepareRegex = require(`./prepare-regex`) -const Promise = require(`bluebird`) +const { getNodesByType } = require(`../db`) const { trackInlineObjectsInRootNode } = require(`./node-tracking`) -const { getNode } = require(`../redux`) const resolvedNodesCache = new Map() const enhancedNodeCache = new Map() @@ -38,22 +34,32 @@ function awaitSiftField(fields, node, k) { return undefined } -/* - * Filters a list of nodes using mongodb-like syntax. - * Returns a single unwrapped element if connection = false. +/** + * Runs the graphql query over the nodes in loki, but uses sift.js for + * querying instead of loki. It does this because it needs to first + * iterate over all nodes calling plugin field resolvers to make sure + * they have been realized before querying occurs * + * @param {Object} args. Object with: + * + * {Object} gqlType: built during `./build-node-types.js` + * + * {Object} rawGqlArgs: The raw graphql query as a js object. E.g `{ + * filter: { fields { slug: { eq: "/somepath" } } } }` + * + * {boolean} firstOnly: Whether to return the first found match, or + * all matching result. + * + * @returns {promise} A promise that will eventually be resolved with + * a collection of matching objects (even if `firstOnly` is true) */ -module.exports = ({ - args, - nodes, - type, - typeName, - connection = false, - path = ``, -}: Object) => { +function runQuery({ gqlType, rawGqlArgs, firstOnly }) { // Clone args as for some reason graphql-js removes the constructor // from nested objects which breaks a check in sift.js. - const clonedArgs = JSON.parse(JSON.stringify(args)) + const clonedArgs = JSON.parse(JSON.stringify(rawGqlArgs)) + const typeName = gqlType.name + + let nodes = getNodesByType(typeName) const siftifyArgs = object => { const newObject = {} @@ -83,7 +89,7 @@ module.exports = ({ // this avoids including { eq: x } when resolving fields. function extractFieldsToSift(prekey, key, preobj, obj, val) { if (_.isPlainObject(val)) { - _.forEach((val: any), (v, k) => { + _.forEach(val, (v, k) => { if (k === `elemMatch`) { // elemMatch is operator for arrays and not field we want to prepare // so we need to skip it @@ -166,33 +172,6 @@ module.exports = ({ }) } - // If the the query for single node only has a filter for an "id" - // using "eq" operator, then we'll just grab that ID and return it. - if ( - !connection && - Object.keys(fieldsToSift).length === 1 && - Object.keys(fieldsToSift)[0] === `id` && - Object.keys(siftArgs[0].id).length === 1 && - Object.keys(siftArgs[0].id)[0] === `$eq` - ) { - const nodePromise = resolveRecursive( - getNode(siftArgs[0].id[`$eq`]), - fieldsToSift, - type.getFields() - ) - - nodePromise.then(node => { - if (node) { - createPageDependency({ - path, - nodeId: node.id, - }) - } - }) - - return nodePromise - } - const nodesPromise = () => { const nodesCacheKey = JSON.stringify({ // typeName + count being the same is a pretty good @@ -220,7 +199,7 @@ module.exports = ({ } const enhancedNodeGenerationPromise = new Promise(resolve => { - resolveRecursive(node, fieldsToSift, type.getFields()).then( + resolveRecursive(node, fieldsToSift, gqlType.getFields()).then( resolvedNode => { trackInlineObjectsInRootNode(resolvedNode) if (cacheKey) { @@ -240,7 +219,7 @@ module.exports = ({ } } const tempPromise = nodesPromise().then(myNodes => { - if (!connection) { + if (firstOnly) { const index = _.isEmpty(siftArgs) ? 0 : sift.indexOf( @@ -250,52 +229,41 @@ module.exports = ({ myNodes ) - // If a node is found, create a dependency between the resulting node and - // the path. if (index !== -1) { - createPageDependency({ - path, - nodeId: myNodes[index].id, - }) - - return myNodes[index] + return [myNodes[index]] } else { - return null + return [] } - } - - let result = _.isEmpty(siftArgs) - ? myNodes - : sift( - { - $and: siftArgs, - }, - myNodes - ) + } else { + let result = _.isEmpty(siftArgs) + ? myNodes + : sift( + { + $and: siftArgs, + }, + myNodes + ) - if (!result || !result.length) return null + if (!result || !result.length) return null - // Sort results. - if (clonedArgs.sort) { - // create functions that return the item to compare on - // uses _.get so nested fields can be retrieved - const convertedFields = clonedArgs.sort.fields - .map(field => field.replace(/___/g, `.`)) - .map(field => v => _.get(v, field)) + // Sort results. + if (clonedArgs.sort) { + // create functions that return the item to compare on + // uses _.get so nested fields can be retrieved + const convertedFields = clonedArgs.sort.fields + .map(field => field.replace(/___/g, `.`)) + .map(field => v => _.get(v, field)) - result = _.orderBy(result, convertedFields, clonedArgs.sort.order) - } + result = _.orderBy(result, convertedFields, clonedArgs.sort.order) + } - const connectionArray = connectionFromArray(result, args) - connectionArray.totalCount = result.length - if (result.length > 0 && result[0].internal) { - createPageDependency({ - path, - connection: result[0].internal.type, - }) + return result } - return connectionArray }) return tempPromise } + +module.exports = { + runQuery, +} diff --git a/packages/gatsby/src/schema/run-query.js b/packages/gatsby/src/schema/run-query.js new file mode 100644 index 0000000000000..40c26d2fe8879 --- /dev/null +++ b/packages/gatsby/src/schema/run-query.js @@ -0,0 +1,56 @@ +const _ = require(`lodash`) +const queryLoki = require(`./query-loki`) +const querySift = require(`./query-sift`) +const { pluginFieldTracking } = require(`./plugin-fields`) + +function hasPluginFields(queryArgs) { + return _.some(queryArgs.filter, (v, fieldName) => + pluginFieldTracking.has(fieldName) + ) +} + +function chooseQueryEngine(queryArgs) { + if (hasPluginFields(queryArgs)) { + return querySift.runQuery + } else { + return queryLoki.runQuery + } +} + +/** + * Runs the query over all nodes of type. It must first select the + * appropriate query engine. Sift, or Loki. Sift is used if the query + * includes plugin fields, i.e those declared by plugins during the + * `setFieldsOnGraphQLNodeType` API. If it does, then we must iterate + * through all nodes calling the plugin field to make sure it's + * realized, then we can perform the query. See `query-sift.js` for + * more. + * + * If the query does *not* include plugin fields, then we can perform + * a much faster pure data query using loki. See `query-loki.js` for + * more. + * + * @param {Object} args. Object with: + * + * {Object} gqlType: built during `./build-node-types.js` + * + * {Object} rawGqlArgs: The raw graphql query as a js object. E.g `{ + * filter: { fields { slug: { eq: "/somepath" } } } }` + * + * {Object} context: The context from the QueryJob + * + * {boolean} firstOnly: Whether to return the first found match, or + * all matching result. + * + * @returns {promise} A promise that will eventually be resolved with + * a collection of matching objects (even if `firstOnly` is true) + */ +function runQuery(args) { + const { queryArgs, ...restArgs } = args + + const queryFunction = chooseQueryEngine(queryArgs) + + return queryFunction({ rawGqlArgs: queryArgs, ...restArgs }) +} + +module.exports.runQuery = runQuery diff --git a/packages/gatsby/src/schema/types/type-file.js b/packages/gatsby/src/schema/types/type-file.js index 301db11b6fce5..7bda49eac5b65 100644 --- a/packages/gatsby/src/schema/types/type-file.js +++ b/packages/gatsby/src/schema/types/type-file.js @@ -6,7 +6,7 @@ const isRelativeUrl = require(`is-relative-url`) const normalize = require(`normalize-path`) const systemPath = require(`path`) -const { getNodes } = require(`../../redux`) +const { getNodesByType } = require(`../../db`) const { findRootNodeAncestor } = require(`../node-tracking`) const { createPageDependency, @@ -108,7 +108,7 @@ function pointsToFile(nodes, key, value) { } const pathToOtherNode = normalize(joinPath(rootNode.dir, value)) - const otherFileExists = getNodes().some( + const otherFileExists = getNodesByType(`File`).some( n => n.absolutePath === pathToOtherNode ) return otherFileExists @@ -148,8 +148,8 @@ function createType(fileNodeRootType, isArray) { // Use that path to find the linked File node. const linkedFileNode = _.find( - getNodes(), - n => n.internal.type === `File` && n.absolutePath === fileLinkPath + getNodesByType(`File`), + n => n.absolutePath === fileLinkPath ) if (linkedFileNode) { createPageDependency({ diff --git a/packages/gatsby/src/utils/api-runner-node.js b/packages/gatsby/src/utils/api-runner-node.js index 906b3393d1025..e67a85bfa45f9 100644 --- a/packages/gatsby/src/utils/api-runner-node.js +++ b/packages/gatsby/src/utils/api-runner-node.js @@ -68,15 +68,14 @@ const runAPI = (plugin, api, args) => { pluginSpan.setTag(`plugin`, plugin.name) let pathPrefix = `` + const { store, emitter, loadNodeContent } = require(`../redux`) const { - store, - emitter, - loadNodeContent, getNodes, getNode, + getNodesByType, hasNodeChanged, getNodeAndSavePathDependency, - } = require(`../redux`) + } = require(`../db`) const { boundActionCreators } = require(`../redux/actions`) const doubleBoundActionCreators = doubleBind( @@ -111,6 +110,7 @@ const runAPI = (plugin, api, args) => { emitter, getNodes, getNode, + getNodesByType, hasNodeChanged, reporter, getNodeAndSavePathDependency, diff --git a/packages/gatsby/src/utils/source-nodes.js b/packages/gatsby/src/utils/source-nodes.js index a90fb7e132fb9..c865d1ac72523 100644 --- a/packages/gatsby/src/utils/source-nodes.js +++ b/packages/gatsby/src/utils/source-nodes.js @@ -2,7 +2,8 @@ const _ = require(`lodash`) const report = require(`gatsby-cli/lib/reporter`) const apiRunner = require(`./api-runner-node`) -const { store, getNode } = require(`../redux`) +const { store } = require(`../redux`) +const { getNode, getNodes, deleteEmptyCollections } = require(`../db`) const { boundActionCreators } = require(`../redux/actions`) const { deleteNode } = boundActionCreators @@ -18,7 +19,7 @@ function discoverPluginsWithoutNodes(storeState) { ) // Find out which plugins own already created nodes const nodeOwners = _.uniq( - Array.from(storeState.nodes.values()).reduce((acc, node) => { + Array.from(getNodes()).reduce((acc, node) => { acc.push(node.internal.owner) return acc }, []) @@ -45,11 +46,12 @@ module.exports = async ({ parentSpan } = {}) => { // Garbage collect stale data nodes const touchedNodes = Object.keys(state.nodesTouched) - const staleNodes = Array.from(state.nodes.values()).filter(node => { + const staleNodes = Array.from(getNodes()).filter(node => { // Find the root node. let rootNode = node let whileCount = 0 while ( + rootNode && rootNode.parent && getNode(rootNode.parent) !== undefined && whileCount < 101 @@ -69,5 +71,6 @@ module.exports = async ({ parentSpan } = {}) => { if (staleNodes.length > 0) { staleNodes.forEach(node => deleteNode({ node })) + deleteEmptyCollections() } } diff --git a/www/gatsby-node.js b/www/gatsby-node.js index c08c8800b4faa..efffbbdbeb7d1 100644 --- a/www/gatsby-node.js +++ b/www/gatsby-node.js @@ -244,6 +244,7 @@ exports.createPages = ({ graphql, actions }) => { skip: i * postsPerPage, numPages, currentPage: i + 1, + useQueryIndex: true, }, }) }) diff --git a/yarn.lock b/yarn.lock index 75c9464ba602d..207faf05f7c71 100644 --- a/yarn.lock +++ b/yarn.lock @@ -12093,6 +12093,11 @@ loglevel@^1.4.1: resolved "https://registry.yarnpkg.com/loglevel/-/loglevel-1.6.1.tgz#e0fc95133b6ef276cdc8887cdaf24aa6f156f8fa" integrity sha1-4PyVEztu8nbNyIh82vJKpvFW+Po= +lokijs@^1.5.5: + version "1.5.5" + resolved "https://registry.yarnpkg.com/lokijs/-/lokijs-1.5.5.tgz#1c21f82af7579037fade7b9e4813485c23708bb6" + integrity sha1-HCH4KvdXkDf63nueSBNIXCNwi7Y= + longest-streak@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/longest-streak/-/longest-streak-1.0.0.tgz#d06597c4d4c31b52ccb1f5d8f8fe7148eafd6965"