Skip to content

Commit

Permalink
Add api for s3 processing
Browse files Browse the repository at this point in the history
Add github release actions

Add asynchronous file processing

Use file name as process identifier

Limit textract attempts

Rename env variables

Re-add script

Adjust release workflow

Add AWS authorization by access keys

Merge api and script code

Update README

Handle runtime errors while processing files

Fix `has_finished` field collect endpoint

Add download logging

Write PDF incrementally

Add `AWS_REGION` environment variable to API

Add `SKIP_PROCESSING` variable to api

Add logging to api

Remove aws session by tokens

Check if file exists before processing
  • Loading branch information
daniel-va committed Oct 16, 2024
1 parent 70316d3 commit 1844331
Show file tree
Hide file tree
Showing 31 changed files with 1,453 additions and 390 deletions.
8 changes: 8 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/.github
/.venv
/tmp
/.env
/.env.*
/docker-compose.yml
/Dockerfile
/README.md
67 changes: 67 additions & 0 deletions .github/actions/create-image/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: "Create Docker Image"
description: "Builds a docker image and tags it"
inputs:
IMAGE_NAME:
description: "The image name"
required: true
VERSION:
description: "The version of the image"
required: true
TAG:
description: "The tag of the image, in addition to the version"
required: true
OTHER_TAGS:
description: "Any additional tags, passed directly to docker/metadata-action"
DOCKERFILE:
description: "The path to the Dockerfile"
required: true
GITHUB_TOKEN:
description: "The github token"
required: true

runs:
using: "composite"
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set environment variables
shell: bash
run: |
echo COMMITED_AT=$(git show -s --format=%cI ${{ github.sha }}) >> $GITHUB_ENV
echo REVISION=$(git rev-parse --short HEAD) >> $GITHUB_ENV
- name: Collect docker image metadata
id: meta-data
uses: docker/metadata-action@v5
with:
images: ${{ inputs.IMAGE_NAME }}
labels: |
org.opencontainers.image.created=${{ env.COMMITED_AT }}
org.opencontainers.image.version=v${{ inputs.VERSION }}
org.opencontainers.image.maintainer=EBP Schweiz AG
flavor: |
latest=${{ inputs.TAG == 'latest' }}
tags: |
type=raw,value=${{ inputs.TAG }}
type=raw,value=${{ inputs.VERSION }}
${{ inputs.OTHER_TAGS }}
- name: Log in to the GitHub container registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ inputs.GITHUB_TOKEN }}

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: ./
file: ${{ inputs.DOCKERFILE }}
push: true
tags: ${{ steps.meta-data.outputs.tags }}
labels: ${{ steps.meta-data.outputs.labels }}
no-cache: true
build-args: |
APP_VERSION=${{ inputs.VERSION }}
34 changes: 34 additions & 0 deletions .github/actions/tag-commit/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: "Tag Commit"
description: "Creates or updates a commit tag"
inputs:
TAG_NAME:
description: "The tag's name"
required: true
SHA:
description: "The SHA of the commit to be tagged"
required: true

runs:
using: "composite"
steps:
- name: Create/update tag
uses: actions/github-script@v7
env:
TAG: ${{ inputs.TAG_NAME }}
SHA: ${{ inputs.SHA }}
with:
script: |
github.rest.git.createRef({
owner: context.repo.owner,
repo: context.repo.repo,
ref: `refs/tags/${process.env.TAG}`,
sha: process.env.SHA
}).catch(err => {
if (err.status !== 422) throw err;
github.rest.git.updateRef({
owner: context.repo.owner,
repo: context.repo.repo,
ref: `tags/${process.env.TAG}`,
sha: process.env.SHA
});
})
115 changes: 115 additions & 0 deletions .github/scripts/find-version.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
const findNextVersion = (tags, branch) => {
const version = findMostRecentVersion(tags);
if (version == null) {
return {
major: 1,
minor: 0,
patch: 0,
preRelease: 1,
};
}
if (branch.startsWith("feature/")) {
// It's a minor feature.

// If the previous version was a full release or a patch dev release,
// we are a completely new minor dev release.
// Otherwise, the previous version was itself a minor dev release,
// and we can reuse its number.
if (version.preRelease == null || version.patch !== 0) {
version.minor += 1;
version.patch = 0;
}
} else {
// It's a patch.

// If the previous version was a full release,
// we are a completely new patch dev release.
// Otherwise, we can simply reuse the previous version's number.
if (version.preRelease == null) {
version.patch += 1;
}
}

version.preRelease ??= 0;
version.preRelease += 1;
return version;
};

const findMostRecentVersion = (tags) => {
const versions = findAllVersions(tags);
if (versions.length === 0) {
return null;
}
return versions[0];
};

const findOutdatedVersions = (tags, recentTag) => {
const recentVersion = parseVersion(recentTag);
if (recentVersion == null) {
throw new Error(`recent tag '${recentTag}' is not a version number`);
}
const versions = findAllVersions(tags);
return versions.filter(
(version) =>
// Select all pre-releases that appear before the most recent one.
version.preRelease != null && compareVersions(recentVersion, version) > 0
);
};

const findAllVersions = (tags) => {
return tags
.map(parseVersion)
.filter((it) => it != null)
.sort((a, b) => compareVersions(a, b) * -1);
};

const SEMANTIC_VERSION_PATTERN = /^\d+\.\d+\.\d+(?:-dev\d+)?$/;
const parseVersion = (tag) => {
if (!SEMANTIC_VERSION_PATTERN.test(tag)) {
return null;
}
const [major, minor, patch, preRelease] = tag.split(/[.\-]/);
return {
major: parseInt(major),
minor: parseInt(minor),
patch: parseInt(patch),
preRelease: preRelease && parseInt(preRelease.substring(3)),
};
};

const compareVersions = (a, b) => {
if (a.major !== b.major) {
return a.major - b.major;
}
if (a.minor !== b.minor) {
return a.minor - b.minor;
}
if (a.patch !== b.patch) {
return a.patch - b.patch;
}
if (a.preRelease !== b.preRelease) {
if (a.preRelease == null) {
return 1;
}
if (b.preRelease == null) {
return -1;
}
return a.preRelease - b.preRelease;
}
return 0;
};

const makeVersionTag = ({ major, minor, patch, preRelease }) => {
const tag = `${major}.${minor}.${patch}`;
if (preRelease == null) {
return tag;
}
return `${tag}-dev${preRelease}`;
};

module.exports = {
findNextVersion,
findMostRecentVersion,
findOutdatedVersions,
makeVersionTag,
};
51 changes: 51 additions & 0 deletions .github/scripts/remove-packages.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const { Octokit } = require("@octokit/rest");

const removePackageVersions = async (imageUrl, imageVersions) => {
const octokit = new Octokit({
auth: process.env.GITHUB_TOKEN,
});

const [_imageHost, imageOwner, imageName] = imageUrl.split("/");
const imageIds = await loadOutdatedVersionIds(octokit, imageOwner, imageName, imageVersions);
for (const imageId of imageIds) {
await octokit.rest.packages.deletePackageVersionForOrg({
package_type: "container",
package_name: imageName,
org: imageOwner,
package_version_id: imageId,
});
}
};

const loadOutdatedVersionIds = async (octokit, imageOwner, imageName, versions) => {
let page = 0;
versions = new Set(versions);

const ids = new Set();
while (true) {
const response = await octokit.rest.packages.getAllPackageVersionsForPackageOwnedByOrg({
package_type: "container",
package_name: imageName,
org: imageOwner,
page,
});
if (response.data.length === 0) {
break;
}
for (const entry of response.data) {
// Match any of the requested version's ids,
// as well as any ids that do not have a tag anymore, i.e. are fully unused.
const { tags } = entry.metadata.container;
const matchedTags = tags.filter((tag) => versions.delete(tag));
if (tags.length === 0 || matchedTags.length !== 0) {
ids.add(entry.id);
}
}
page += 1;
}
return ids;
};

module.exports = {
removePackageVersions,
};
113 changes: 113 additions & 0 deletions .github/workflows/publish-edge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
name: Publish Edge

on:
push:
branches:
- "develop"
- "feature/asset-35-*"

workflow_dispatch:
inputs:
version:
type: string
description: |
Version number (e.g. 1.2.3-dev1).
Leave empty to determine the next version automatically.
required: false
default: ""
is-edge:
type: boolean
description: "Tag the commit and published image with `edge`."
default: true

permissions: write-all

env:
IS_EDGE: ${{ github.event_name == 'push' || github.event.inputs.is-edge == 'true' }}

jobs:
determine_version:
name: "determine version"
runs-on: ubuntu-latest
outputs:
version: ${{ steps.find_version.outputs.result || github.event.inputs.version }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
if: ${{ github.event.inputs.version == '' }}
- name: Get tags of edge commit
id: get_edge_tags
if: ${{ github.event.inputs.version == '' }}
run: |
git fetch --tags
EDGE_COMMIT=$(git rev-list -n 1 edge 2>/dev/null || git rev-parse HEAD)
EDGE_TAGS=$(printf "%s," $(git tag --contains $EDGE_COMMIT))
EDGE_TAGS=${EDGE_TAGS%,}
echo "edge_tags=$EDGE_TAGS" >> "$GITHUB_OUTPUT"
- name: Find next version
id: find_version
if: ${{ github.event.inputs.version == '' }}
uses: actions/github-script@v7
env:
EDGE_TAGS: ${{ steps.get_edge_tags.outputs.edge_tags }}
with:
result-encoding: string
script: |
const { findNextVersion } = require('./.github/scripts/find-version.js');
const tags = process.env.EDGE_TAGS.split(',');
const targetBranch = context.payload.ref.replace('refs/heads/', '');
const pullRequests = await github.rest.pulls.list({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'closed',
base: targetBranch,
sort: 'updated',
direction: 'desc'
});
const mergedPullRequest = pullRequests.data.find(pr => pr.merge_commit_sha === context.payload.after);
const sourceBranch = mergedPullRequest == null
? targetBranch
: mergedPullRequest.head.ref.replace('refs/heads/', '')
const version = findNextVersion(tags, sourceBranch);
return `${version.major}.${version.minor}.${version.patch}-dev${version.preRelease}`;
build_and_push_api:
name: "build and push api"
needs:
- determine_version
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Create image
uses: ./.github/actions/create-image
with:
IMAGE_NAME: ${{ vars.BASE_IMAGE_NAME }}-api
TAG: ${{ env.IS_EDGE == 'true' && 'edge' || '' }}
VERSION: ${{ needs.determine_version.outputs.version }}
DOCKERFILE: Dockerfile
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

tag_commit:
name: "tag commit"
needs:
- determine_version
- build_and_push_api
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: tag edge
if: ${{ env.IS_EDGE == 'true' }}
uses: ./.github/actions/tag-commit
with:
TAG_NAME: edge
SHA: ${{ github.sha }}
- name: tag version
uses: ./.github/actions/tag-commit
with:
TAG_NAME: ${{ needs.determine_version.outputs.version }}
SHA: ${{ github.sha }}
Loading

0 comments on commit 1844331

Please sign in to comment.