From 30f005bdbc1f7a26fb3060baf27cb1b1d9941750 Mon Sep 17 00:00:00 2001 From: kwajiehao <31984694+kwajiehao@users.noreply.github.com> Date: Tue, 8 Dec 2020 17:45:36 +0800 Subject: [PATCH] feat: improve `/sites` endpoint performance (#90) * feat: speed site retrieval up by making concurrent api calls Previously, site retrieval was slow because the GitHub API for retrieving org repos was paginated, and we retrieved the data sequentially, one page at a time. This meant that it often took up to 7 or even 8 seconds each time this endpoint is accessed (each page took around 3 seconds, perhaps due to the large amount of data being sent). This commit improves performance by making these api calls concurrently, so that it now only takes around 3 seconds for the endpoint to respond. This commit also introduces an optional env var, ISOMERPAGES_REPO_PAGE_COUNT, which determines how many pages of the GitHub API to comb simultaneously. Since we know the number of repos our github org has, we can use this info to speed up our endpoint by making concurrent calls instead of stepping through the API pagination. * refactor: remove unnecessary filter Co-authored-by: Jie Hao Kwa --- routes/sites.js | 84 +++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/routes/sites.js b/routes/sites.js index 4a8a0e8bd..6f55824c4 100644 --- a/routes/sites.js +++ b/routes/sites.js @@ -1,11 +1,16 @@ const express = require('express'); const router = express.Router(); const axios = require('axios'); +const Bluebird = require('bluebird'); const _ = require('lodash'); const { attachRouteHandlerWrapper } = require('../middleware/routeHandler'); +const { flatten } = require('lodash'); + // Import error const { NotFoundError } = require('../errors/NotFoundError') +const GH_MAX_REPO_COUNT = 100 +const ISOMERPAGES_REPO_PAGE_COUNT = process.env.ISOMERPAGES_REPO_PAGE_COUNT || 3 const ISOMER_GITHUB_ORG_NAME = process.env.GITHUB_ORG_NAME const ISOMER_ADMIN_REPOS = [ 'isomercms-backend', @@ -46,50 +51,47 @@ const timeDiff = (lastUpdated) => { async function getSites (req, res, next) { const { accessToken } = req - // Variable to store user repos - let siteNames = [] - - // Variables to track pagination of user's repos in case user has more than 100 - let pageCount = 1 - let hasNextPage = true; - const endpoint = `https://api.github.com/orgs/${ISOMER_GITHUB_ORG_NAME}/repos`; - - // Loop through all user repos - while (hasNextPage) { - const resp = await axios.get(endpoint, { - params: { - per_page: 100, - page: pageCount, - sort: "full_name", - }, - headers: { - Authorization: `token ${accessToken}`, - "Content-Type": "application/json", - } - }) - - // Filter for isomer repos - const isomerRepos = resp.data.reduce((acc, repo) => { - const { permissions, updated_at, name } = repo - if (permissions.push === true) { - return acc.concat({ - repoName: name, - lastUpdated: timeDiff(updated_at), - }) + const endpoint = `https://api.github.com/orgs/${ISOMER_GITHUB_ORG_NAME}/repos`; + + const params = { + per_page: GH_MAX_REPO_COUNT, + sort: "full_name", + } + + // Simultaneously retrieve all isomerpages repos + const paramsArr = [] + for (i = 0; i < ISOMERPAGES_REPO_PAGE_COUNT; i++) { + paramsArr.push({ ...params, page: i + 1 }) + } + + const sites = await Bluebird.map(paramsArr, async (params) => { + const resp = await axios.get(endpoint, { + params, + headers: { + Authorization: `token ${accessToken}`, + "Content-Type": "application/json", + } + }) + + return resp.data + .map((repoData) => { + const { + updated_at, + permissions, + name + } = repoData + + return { + lastUpdated: timeDiff(updated_at), + permissions, + repoName: name, } - return acc - }, []) + }).filter((repoData) => repoData.permissions.push === true && !ISOMER_ADMIN_REPOS.includes(repoData.repoName)) + }) + const flattenedSites = _.flatten(sites) - siteNames = siteNames.concat(isomerRepos) - hasNextPage = resp.headers.link ? resp.headers.link.includes('next') : false - ++pageCount - } - - // Remove Isomer admin repositories from this list - siteNames = _.difference(siteNames, ISOMER_ADMIN_REPOS) - - res.status(200).json({ siteNames }) + res.status(200).json({ siteNames: flattenedSites }) } /* Checks if a user has access to a repo. */