Skip to content

Commit

Permalink
Merge branch 'master' into qt/fix_pod_latest_version
Browse files Browse the repository at this point in the history
  • Loading branch information
qtomlinson authored Aug 12, 2024
2 parents 0817188 + ae32c98 commit 6a55052
Show file tree
Hide file tree
Showing 22 changed files with 1,357 additions and 202 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
name: Run Docker build and tests

on:
workflow_dispatch:
push:
branches:
- master
- prod
pull_request:
branches:
- master
- prod

permissions:
contents: read
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ See `local.env.list`, `dev.env.list` and `prod.env.list` tempate files.
- Allowed resource types: Container, Object
- Allowed permissions: Read, Write, List, Add, Process

### Running Crawler Queues in separate Azure account

Crawler can be run with the queues in a different Azure account from the storage
blobs. This is useful where you are running the crawler and submitting results
to `clearlydefinedprod` _but_ you don't want to have queues in the same Azure
account. As anyone with access to `clearlydefinedprod` can get access to your
queues.

Set env var `CRAWLER_QUEUE_AZURE_CONNECTION_STRING` with a connection string
obtained from Azure.

## Build and run Docker image locally

`docker build --platform linux/amd64 -t cdcrawler:latest .`
Expand Down
2 changes: 1 addition & 1 deletion config/cdConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ module.exports = {
},
storageQueue: {
weights: { immediate: 3, soon: 2, normal: 3, later: 2 },
connectionString: cd_azblob.connection,
connectionString: config.get('CRAWLER_QUEUE_AZURE_CONNECTION_STRING') || cd_azblob.connection,
queueName: config.get('CRAWLER_QUEUE_PREFIX') || 'cdcrawlerdev',
visibilityTimeout: 8 * 60 * 60, // 8 hours
visibilityTimeout_remainLocal: fetchedCacheTtlSeconds,
Expand Down
48 changes: 48 additions & 0 deletions lib/fetch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// (c) Copyright 2024, SAP SE and ClearlyDefined contributors. Licensed under the MIT license.
// SPDX-License-Identifier: MIT

const axios = require('axios')

function buildRequestOptions(request) {
let responseType = 'text'
if (request.json) {
responseType = 'json'
} else if (request.encoding === null) {
responseType = 'stream'
}

const validateOptions = {}
if (request.simple === false) {
validateOptions.validateStatus = () => true
}

return {
method: request.method,
url: request.url || request.uri,
responseType,
headers: request.headers,
data: request.body,
...validateOptions
}
}

async function callFetch(request, axiosInstance = axios) {
try {
const options = buildRequestOptions(request)
const response = await axiosInstance(options)
if (!request.resolveWithFullResponse) return response.data
response.statusCode = response.status
response.statusMessage = response.statusText
return response
} catch (error) {
error.statusCode = error.response?.status
throw error
}
}

function withDefaults(opts) {
const axiosInstance = axios.create(opts)
return request => callFetch(request, axiosInstance)
}

module.exports = { callFetch, withDefaults }
Loading

0 comments on commit 6a55052

Please sign in to comment.