Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(GGs): introduce GitFileSystemService as middleman interface #867

Merged
merged 16 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
"sequelize-typescript": "^2.1.3",
"serialize-error": "^7.0.1",
"serverless": "^3.33.0",
"simple-git": "^3.19.1",
"slugify": "^1.6.0",
"supertest": "^6.3.3",
"toml": "^3.0.0",
Expand All @@ -104,6 +105,7 @@
"@types/express-session": "^1.17.5",
"@types/jest": "^27.4.1",
"@types/lodash": "^4.14.186",
"@types/mock-fs": "^4.13.1",
"@types/node": "^17.0.21",
"@types/supertest": "^2.0.11",
"@types/validator": "^13.7.1",
Expand All @@ -126,6 +128,7 @@
"jest-extended": "^2.0.0",
"jest-mock-axios": "^4.5.0",
"lint-staged": "^11.1.2",
"mock-fs": "^5.2.0",
"prettier": "2.2.1",
"regenerator-runtime": "^0.13.9",
"serverless-step-functions": "^3.14.0",
Expand Down
8 changes: 8 additions & 0 deletions src/config/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ const config = convict({
default: "site-launch",
},
},
efs: {
volPath: {
doc: "Path to the EFS volume for storing the Git repositories",
env: "EFS_VOL_PATH",
format: "required-string",
default: "/efs/repos",
},
},
stepFunctions: {
stepFunctionsArn: {
doc: "Amazon Resource Name (ARN) of the Step Functions state machine",
Expand Down
11 changes: 11 additions & 0 deletions src/errors/GitFileSystemError.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { BaseIsomerError } from "./BaseError"

export default class GitFileSystemError extends BaseIsomerError {
constructor(message: string) {
super({
status: 500,
code: "GitFileSystemError",
message,
})
}
}
277 changes: 277 additions & 0 deletions src/services/db/GitFileSystemService.ts
seaerchin marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
import fs from "fs"

import { errAsync, okAsync, ResultAsync } from "neverthrow"
import { SimpleGit } from "simple-git"

import { config } from "@config/config"

import GitFileSystemError from "@errors/GitFileSystemError"

import { ISOMER_GITHUB_ORG_NAME } from "@constants/constants"

import type { GitDirectoryItem, GitFile } from "@root/types/gitfilesystem"

/**
* Some notes:
* - Seems like getTree, updateTree and updateRepoState is always used together
*/

const EFS_VOL_PATH = config.get("aws.efs.volPath")
const BRANCH_REF = config.get("github.branchRef")

export default class GitFileSystemService {
private readonly git: SimpleGit

constructor(git: SimpleGit) {
this.git = git
}

private async isGitInitialized(
repoName: string
): Promise<ResultAsync<boolean, GitFileSystemError>> {
dcshzj marked this conversation as resolved.
Show resolved Hide resolved
try {
const isGitRepo = await this.git
.cwd(`${EFS_VOL_PATH}/${repoName}`)
.checkIsRepo()

return okAsync(isGitRepo)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

private async isOriginRemoteCorrect(
repoName: string
): Promise<ResultAsync<boolean, GitFileSystemError>> {
try {
const originUrl = `[email protected]:${ISOMER_GITHUB_ORG_NAME}/${repoName}.git`
const remoteUrl = await this.git
.cwd(`${EFS_VOL_PATH}/${repoName}`)
.remote(["get-url", "origin"])

return okAsync(!remoteUrl || remoteUrl.trim() !== originUrl)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// Determine if the folder is a valid Git repository
async isValidGitRepo(
repoName: string
): Promise<ResultAsync<boolean, GitFileSystemError>> {
try {
const isFolderExisting = fs.existsSync(`${EFS_VOL_PATH}/${repoName}`)
if (!isFolderExisting) {
return okAsync(false)
}

const isGitInitialized = await this.isGitInitialized(repoName)
if (isGitInitialized.isErr() || !isGitInitialized.value) {
return okAsync(false)
}

const isOriginRemoteCorrect = await this.isOriginRemoteCorrect(repoName)
return okAsync(
isOriginRemoteCorrect.isOk() && isOriginRemoteCorrect.value
)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// Obtain the Git blob hash of a file or directory
async getGitBlobHash(
repoName: string,
filePath: string
): Promise<ResultAsync<string, GitFileSystemError>> {
try {
const hash = await this.git
.cwd(`${EFS_VOL_PATH}/${repoName}`)
.checkout(BRANCH_REF)
.revparse([`HEAD:${filePath}`])
dcshzj marked this conversation as resolved.
Show resolved Hide resolved
return okAsync(hash)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// Clone repository from upstream Git hosting provider
async clone(
repoName: string
): Promise<ResultAsync<string, GitFileSystemError>> {
const originUrl = `[email protected]:${ISOMER_GITHUB_ORG_NAME}/${repoName}.git`

try {
if (fs.existsSync(`${EFS_VOL_PATH}/${repoName}`)) {
const isGitInitialized = await this.isGitInitialized(repoName)
if (isGitInitialized.isErr() || !isGitInitialized.value) {
return errAsync(
new GitFileSystemError(
`An existing folder "${repoName}" exists but is not a Git repo`
)
)
}

const isOriginRemoteCorrect = await this.isOriginRemoteCorrect(repoName)
if (isOriginRemoteCorrect.isErr() || !isOriginRemoteCorrect.value) {
return errAsync(
new GitFileSystemError(
`An existing folder "${repoName}" exists but is not the correct Git repo`
)
)
}

return okAsync(`${EFS_VOL_PATH}/${repoName}`)
}

await this.git
.clone(originUrl, `${EFS_VOL_PATH}/${repoName}`)
.cwd(`${EFS_VOL_PATH}/${repoName}`)
dcshzj marked this conversation as resolved.
Show resolved Hide resolved
.checkout(BRANCH_REF)
return okAsync(`${EFS_VOL_PATH}/${repoName}`)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// Pull the latest changes from upstream Git hosting provider
// TODO: Pulling is a very expensive operation, should find a way to optimise
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an interesting note, out of curiosity why is this the case ah? I thought after the first git clone, the git pull should be ok no?
How expensive is this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expensive in terms of latency? Latency of network call or disk write? Curious - did you manage to benchmark this on ur local?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't manage to dive into the exact cause of the latency issues, but just a quick benchmarking on my local machine, the Git pull operation adds almost 2 seconds in latency for each request (281 ms without, 2.91 seconds with pulling)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, let's focus on latency once MVP is out as our baseline metric is the reduction of API calls

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought git checkout might help and I ran a simple benchmark, weirdly checkout is slower than pull?

❯ node benchmark.js
Time taken for git checkout: 3386.3763749599457 ms
Time taken for git pull: 3241.95516705513 ms
❯ node benchmark.js
Time taken for git checkout: 3356.351875066757 ms
Time taken for git pull: 3228.0880839824677 ms
❯ node benchmark.js
Time taken for git checkout: 3306.690124988556 ms
Time taken for git pull: 3079.9089580774307 ms
❯ node benchmark.js
Time taken for git pull: 2990.787334084511 ms
Time taken for git checkout: 3404.82679104805 ms

async pull(
repoName: string
): Promise<ResultAsync<string, GitFileSystemError>> {
const isValid = await this.isValidGitRepo(repoName)

if (isValid.isOk() && !isValid.value) {
return errAsync(
new GitFileSystemError(`Folder "${repoName}" is not a valid Git repo`)
)
}
if (isValid.isErr()) {
return errAsync(isValid.error)
}

try {
await this.git.cwd(`${EFS_VOL_PATH}/${repoName}`).pull()
return okAsync(`${EFS_VOL_PATH}/${repoName}`).checkout(BRANCH_REF)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// TODO: Creates either directory or file
// ResourceDirectoryService used this to create a directory + file at the same time
async create() {}
dcshzj marked this conversation as resolved.
Show resolved Hide resolved

// Read the contents of a file
async read(
repoName: string,
filePath: string
): Promise<ResultAsync<GitFile, GitFileSystemError>> {
// Ensure that the repository is up-to-date first
const isUpdated = await this.pull(repoName)
dcshzj marked this conversation as resolved.
Show resolved Hide resolved

if (isUpdated.isErr()) {
return errAsync(isUpdated.error)
}

// Read the file and get the hash concurrently
try {
const [contents, sha] = await Promise.all([
fs.promises.readFile(
`${EFS_VOL_PATH}/${repoName}/${filePath}`,
"utf-8"
),
this.getGitBlobHash(repoName, filePath),
])

if (sha.isErr()) {
return errAsync(sha.error)
}

const result: GitFile = {
contents,
sha: sha.value,
}

return okAsync(result)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// Read the contents of a directory
async listDirectoryContents(
repoName: string,
directoryPath: string
): Promise<ResultAsync<GitDirectoryItem[], GitFileSystemError>> {
// Check that the directory path provided exists and is a directory
try {
const stats = await fs.promises.stat(
`${EFS_VOL_PATH}/${repoName}/${directoryPath}`
)

if (!stats.isDirectory()) {
return errAsync(
new GitFileSystemError(
`Path "${directoryPath}" is not a directory in repo "${repoName}"`
)
)
}
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}

// Read the directory contents
try {
const directoryContents = await fs.promises.readdir(
`${EFS_VOL_PATH}/${repoName}/${directoryPath}`,
{ withFileTypes: true }
)

const result: GitDirectoryItem[] = await Promise.all(
directoryContents.map(async (directoryItem) => {
const isDirectory = directoryItem.isDirectory()
const { name } = directoryItem
const path = directoryPath === "" ? name : `${directoryPath}/${name}`
const type = isDirectory ? "dir" : "file"
const fileHash = await this.getGitBlobHash(repoName, path)
const sha = fileHash.isOk() ? fileHash.value : ""
seaerchin marked this conversation as resolved.
Show resolved Hide resolved

return {
name,
type,
sha,
path,
}
})
)

// Note: The sha is empty if the file is not tracked by Git
const gitTrackedResults = result.filter(
(directoryItem) => directoryItem.sha !== ""
)

return okAsync(gitTrackedResults)
} catch (error: unknown) {
const message = error instanceof Error ? error.message : "Unknown error"
return errAsync(new GitFileSystemError(message))
}
}

// TODO: Update the contents of a file
async update() {}

// TODO: Delete a file
async delete() {}

// TODO: Get the latest commit of branch
async getLatestCommitOfBranch() {}
}
Loading