Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate file uploading to a common knowledge documents repo #6

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ IL_MERLINITE_MODEL_NAME=<MERLINITE_MODEL_NAME>
GITHUB_TOKEN=<TOKEN FOR OAUTH INSTRUCTLAB MEMBER LOOKUP>
TAXONOMY_REPO_OWNER=<GITHUB_ACCOUNT>
TAXONOMY_REPO=<REPO_NAME>
TAXONOMY_DOCUMENTS_REPO=github.com/<USER_ID>/<REPO_NAME>
6 changes: 4 additions & 2 deletions src/app/api/auth/[...nextauth]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,17 @@ const authOptions: NextAuthOptions = {
if (user) {
token.id = user.id;
}
console.log('JWT Callback:', token);
// Uncomment for JWT debugging
// console.log('JWT Callback:', token);
return token;
},
async session({ session, token }) {
if (token) {
session.accessToken = token.accessToken;
session.id = token.id;
}
console.log('Session Callback:', session);
// Uncomment for session callback debugging
// console.log('Session Callback:', session);
return session;
},
async signIn({ account, profile }) {
Expand Down
11 changes: 8 additions & 3 deletions src/app/api/pr/knowledge/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ export async function POST(req: NextRequest) {
const forkExists = await checkUserForkExists(headers, githubUsername);
if (!forkExists) {
await createFork(headers);
// Add a delay to ensure the fork operation completes to avoid a race condition when retrieving the bas SHA
// This only occurs if this is the first time submitting and the fork isn't present.
// TODO change to a retry
console.log('Pause 5s for the forking operation to complete');
await new Promise((resolve) => setTimeout(resolve, 5000));
}

const branchName = `knowledge-contribution-${Date.now()}`;
Expand All @@ -72,7 +77,7 @@ export async function POST(req: NextRequest) {
}

const yamlData = {
created_by: email,
created_by: githubUsername,
domain: domain,
task_description: task_description,
seed_examples: questions.map((question: string, index: number) => {
Expand Down Expand Up @@ -103,7 +108,7 @@ Creator names: ${creators}
// Create a new branch in the user's fork
await createBranch(headers, githubUsername, branchName, baseBranchSha);

// Create both files in a single commit
// Create both files in a single commit with DCO sign-off
await createFilesInSingleCommit(
headers,
githubUsername,
Expand All @@ -112,7 +117,7 @@ Creator names: ${creators}
{ path: newAttributionFilePath, content: attributionContent }
],
branchName,
task_details
`${task_details}\n\nSigned-off-by: ${email}`
);

// Create a pull request from the user's fork to the upstream repository
Expand Down
245 changes: 245 additions & 0 deletions src/app/api/upload/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
// src/app/api/upload/route.ts
import { NextResponse } from 'next/server';
import { getToken } from 'next-auth/jwt';
import { NextRequest } from 'next/server';

const GITHUB_API_URL = 'https://api.github.com';
const TAXONOMY_DOCUMENTS_REPO = process.env.TAXONOMY_DOCUMENTS_REPO!;
const BASE_BRANCH = 'main';

export async function POST(req: NextRequest) {
const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! });
console.log('GitHub Token:', token);

if (!token || !token.accessToken) {
console.error('Unauthorized: Missing or invalid access token');
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
}

const githubToken = token.accessToken as string;
const headers = {
'Content-Type': 'application/json',
Authorization: `Bearer ${githubToken}`,
Accept: 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
};

try {
const body = await req.json();
const { files } = body;

// Fetch GitHub username and email
const { githubUsername, userEmail } = await getGitHubUsernameAndEmail(headers);
console.log('GitHub Username:', githubUsername);
console.log('User Email:', userEmail);

// Split the TAXONOMY_DOCUMENTS_REPO into owner and repo name
const repoPath = TAXONOMY_DOCUMENTS_REPO.replace('github.com/', '');
const [repoOwner, repoName] = repoPath.split('/');

console.log(`Repo Owner: ${repoOwner}`);
console.log(`Repo Name: ${repoName}`);

// Check if the repository is already forked
const repoForked = await checkIfRepoExists(headers, githubUsername, repoName);
console.log(`Repository forked: ${repoForked}`);
if (!repoForked) {
// Fork the repository if it is not already forked
await forkRepo(headers, repoOwner, repoName, githubUsername);
// Add a delay to ensure the fork operation completes to avoid a race condition when retrieving the bas SHA
// This only occurs if this is the first time submitting and the fork isn't present.
// TODO change to a retry
console.log('Pause 5s for the forking operation to complete');
await new Promise((resolve) => setTimeout(resolve, 5000));
console.log('Repository forked');
}

// Fetch the latest commit SHA of the base branch
const baseBranchSha = await getBranchSha(headers, githubUsername, repoName, BASE_BRANCH);
console.log(`Base branch SHA: ${baseBranchSha}`);

// Create files in the main branch with unique filenames e.g. foo-20240618T203521842.md
const timestamp = new Date().toISOString().replace(/[-:.]/g, '').replace('T', 'T').slice(0, -1);
const filesWithTimestamp = files.map((file: { fileName: string; fileContent: string }) => {
const [name, extension] = file.fileName.split(/\.(?=[^.]+$)/);
return {
fileName: `${name}-${timestamp}.${extension}`,
fileContent: file.fileContent
};
});

const commitSha = await createFilesCommit(headers, githubUsername, repoName, BASE_BRANCH, filesWithTimestamp, userEmail, baseBranchSha);
console.log(`Created files commit SHA: ${commitSha}`);

return NextResponse.json(
{
repoUrl: `https://github.com/${githubUsername}/${repoName}`,
commitSha,
documentNames: filesWithTimestamp.map((file: { fileName: string }) => file.fileName),
prUrl: `https://github.com/${githubUsername}/${repoName}`
},
{ status: 201 }
);
} catch (error) {
console.error('Failed to upload documents:', error);
return NextResponse.json({ error: 'Failed to upload documents' }, { status: 500 });
}
}

async function getGitHubUsernameAndEmail(headers: HeadersInit): Promise<{ githubUsername: string; userEmail: string }> {
const response = await fetch(`${GITHUB_API_URL}/user`, { headers });

if (!response.ok) {
const errorText = await response.text();
console.error('Failed to fetch GitHub username and email:', response.status, errorText);
throw new Error('Failed to fetch GitHub username and email');
}

const data = await response.json();
return { githubUsername: data.login, userEmail: data.email };
}

async function checkIfRepoExists(headers: HeadersInit, owner: string, repo: string): Promise<boolean> {
const response = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}`, { headers });
const exists = response.ok;
if (!exists) {
const errorText = await response.text();
console.error('Repository does not exist:', response.status, errorText);
}
return exists;
}

async function forkRepo(headers: HeadersInit, owner: string, repo: string, forkOwner: string) {
const response = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/forks`, {
method: 'POST',
headers
});

if (!response.ok) {
const errorText = await response.text();
console.error('Failed to fork repository:', response.status, errorText);
throw new Error('Failed to fork repository');
}

// Wait for the fork to be created
let forkCreated = false;
for (let i = 0; i < 10; i++) {
const forkExists = await checkIfRepoExists(headers, forkOwner, repo);
if (forkExists) {
forkCreated = true;
break;
}
await new Promise((resolve) => setTimeout(resolve, 3000));
}

if (!forkCreated) {
throw new Error('Failed to confirm fork creation');
}
}

async function getBranchSha(headers: HeadersInit, owner: string, repo: string, branch: string): Promise<string> {
console.log(`Fetching branch SHA for ${branch}...`);
const response = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/ref/heads/${branch}`, { headers });

if (!response.ok) {
const errorText = await response.text();
console.error('Failed to get branch SHA:', response.status, errorText);
if (response.status === 409 && errorText.includes('Git Repository is empty')) {
throw new Error('Git Repository is empty.');
}
throw new Error('Failed to get branch SHA');
}

const data = await response.json();
console.log('Branch SHA:', data.object.sha);
return data.object.sha;
}

async function createFilesCommit(
headers: HeadersInit,
owner: string,
repo: string,
branchName: string,
files: { fileName: string; fileContent: string }[],
userEmail: string,
baseSha: string
): Promise<string> {
console.log('Creating files commit...');
// Create blobs for each file
const blobs = await Promise.all(
files.map((file) =>
fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/blobs`, {
method: 'POST',
headers,
body: JSON.stringify({
content: file.fileContent,
encoding: 'utf-8'
})
}).then((response) => response.json())
)
);
console.log('Blobs created:', blobs);

// Create tree
const createTreeResponse = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/trees`, {
method: 'POST',
headers,
body: JSON.stringify({
base_tree: baseSha,
tree: files.map((file, index) => ({
path: file.fileName,
mode: '100644',
type: 'blob',
sha: blobs[index].sha
}))
})
});

if (!createTreeResponse.ok) {
const errorText = await createTreeResponse.text();
console.error('Failed to create tree:', createTreeResponse.status, errorText);
throw new Error('Failed to create tree');
}

const treeData = await createTreeResponse.json();
console.log('Tree created:', treeData);

// Create commit with DCO sign-off
// TODO: if the user's github does not have an associated github email, we need to specify one in the upload section
// or reuse the one from the form. If we use the email field from the form, it needs to be null checked when
// the user clicks the upload documents button.
const createCommitResponse = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/commits`, {
method: 'POST',
headers,
body: JSON.stringify({
message: `Add files: ${files.map((file) => file.fileName).join(', ')}\n\nSigned-off-by: ${userEmail}`,
tree: treeData.sha,
parents: [baseSha]
})
});

if (!createCommitResponse.ok) {
const errorText = await createCommitResponse.text();
console.error('Failed to create commit:', createCommitResponse.status, errorText);
throw new Error('Failed to create commit');
}

const commitData = await createCommitResponse.json();
console.log('Commit created:', commitData);

// Update branch reference
const updateBranchResponse = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/refs/heads/${branchName}`, {
method: 'PATCH',
headers,
body: JSON.stringify({ sha: commitData.sha })
});

if (!updateBranchResponse.ok) {
const errorText = await updateBranchResponse.text();
console.error('Failed to update branch reference:', updateBranchResponse.status, errorText);
throw new Error('Failed to update branch reference');
}
console.log('Branch reference updated');

return commitData.sha;
}
Loading