Skip to content

Commit

Permalink
fix: Ensure fatal logs are issued reliably when processes crash (#399)
Browse files Browse the repository at this point in the history
* uncaughtException handler

* update docs

* refactor

* fix formatting

* Configure make-promises-safe

* silence TSLint warning

* Replace make-promises-safe with --unhandled-rejections=strict

* configure gRPC

* increase functional test suite timeout

* integrate CRC queue

* refactor

* integrate generate-keypairs.ts

* integrate pdc queue

* integrate fastify

* fix broken test

* fix broken tests

* fix broken test

* fix broken tests
  • Loading branch information
gnarea authored Feb 6, 2021
1 parent d9ffd55 commit 68ea854
Show file tree
Hide file tree
Showing 39 changed files with 209 additions and 79 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
- run: npm run test:functional
env:
IS_GITHUB: 'true'
timeout-minutes: 1
timeout-minutes: 2
- name: Output diagnostic information upon failure
if: failure()
run: .github/scripts/output-diagnostics.sh
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ FROM node:12.20.1-slim
WORKDIR /opt/gw
COPY --from=build /tmp/gw ./
USER node
ENTRYPOINT ["node"]
ENTRYPOINT ["node", "--unhandled-rejections=strict"]
EXPOSE 8080
3 changes: 1 addition & 2 deletions chart/templates/cogrpc-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/cogrpc-server.js
env:
- name: PUBLIC_ADDRESS
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/crcin-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/crc-queue-worker.js
env:
- name: WORKER_ID
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/keygen-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ spec:
- name: keygen
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/generate-keypairs.js
envFrom:
- configMapRef:
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/pdcout-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/pdc-outgoing-queue-worker.js
env:
- name: POHTTP_ADDRESS
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/pohttp-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/pohttp-server.js
env:
{{- if .Values.proxyRequestIdHeader }}
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/poweb-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/poweb-server.js
env:
{{- if .Values.proxyRequestIdHeader }}
Expand Down
2 changes: 1 addition & 1 deletion docs/instrumentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ We use log levels as follows:
- `info`: Events for any outcome observed outside the gateway, or any unusual interaction with a backing service.
- `warning`: Something has gone wrong but it's being handled gracefully. Triage can start on the next working day.
- `error`: Something has gone wrong and triage must start within a few minutes. Wake up an SRE if necessary.
- `fatal`: Not used.
- `fatal`: A process has crashed.
5 changes: 0 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
"grpc": "^1.24.4",
"grpc-health-check": "^1.8.0",
"it-pipe": "^1.1.0",
"make-promises-safe": "^5.1.0",
"mongoose": "^5.10.18",
"node-nats-streaming": "^0.3.2",
"pino": "^6.11.0",
Expand Down
3 changes: 0 additions & 3 deletions src/bin/cogrpc-server.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { runServer } from '../services/cogrpc/server';

runServer();
6 changes: 0 additions & 6 deletions src/bin/crc-queue-worker.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { get as getEnvVar } from 'env-var';

import { processIncomingCrcCargo } from '../services/crcQueueWorker';

const WORKER_ID = getEnvVar('WORKER_ID').required().asString();
processIncomingCrcCargo(WORKER_ID);

// tslint:disable-next-line:no-console
console.log(`Starting queue worker ${WORKER_ID}`);
21 changes: 11 additions & 10 deletions src/bin/generate-keypairs.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
// tslint:disable:no-console

// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import {
Certificate,
generateECDHKeyPair,
Expand All @@ -17,6 +12,11 @@ import { get as getEnvVar } from 'env-var';
import { initVaultKeyStore } from '../backingServices/keyStores';
import { createMongooseConnectionFromEnv } from '../backingServices/mongo';
import { OwnCertificate } from '../services/models';
import { configureExitHandling } from '../utilities/exitHandling';
import { makeLogger } from '../utilities/logging';

const LOGGER = makeLogger();
configureExitHandling(LOGGER);

const NODE_CERTIFICATE_TTL_DAYS = 180;
const SESSION_CERTIFICATE_TTL_DAYS = 60;
Expand All @@ -29,10 +29,10 @@ async function main(): Promise<void> {
const keyId = Buffer.from(KEY_ID_BASE64, 'base64');
try {
await sessionStore.fetchNodeKey(keyId);
console.warn(`Gateway key ${KEY_ID_BASE64} already exists`);
LOGGER.warn(`Gateway key ${KEY_ID_BASE64} already exists`);
return;
} catch (error) {
console.log(`Gateway key will be created because it doesn't already exist`);
LOGGER.info(`Gateway key will be created because it doesn't already exist`);
}

const gatewayKeyPair = await generateRSAKeyPair();
Expand Down Expand Up @@ -66,12 +66,13 @@ async function main(): Promise<void> {
});
await sessionStore.saveInitialSessionKey(initialSessionKeyPair.privateKey, initialKeyCertificate);

console.log(
JSON.stringify({
LOGGER.info(
{
gatewayCertificate: base64Encode(gatewayCertificate.serialize()),
initialSessionCertificate: base64Encode(initialKeyCertificate.serialize()),
keyPairId: KEY_ID_BASE64,
}),
},
'Key pairs were successfully generated',
);
}

Expand Down
3 changes: 0 additions & 3 deletions src/bin/pdc-outgoing-queue-worker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { get as getEnvVar } from 'env-var';

import { processInternetBoundParcels } from '../services/internetBoundParcelsQueueWorker';
Expand Down
3 changes: 0 additions & 3 deletions src/bin/pohttp-server.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { makeServer } from '../services/pohttp/server';
import { runFastify } from '../utilities/fastify';

Expand Down
3 changes: 0 additions & 3 deletions src/bin/poweb-server.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { makeServer } from '../services/poweb/server';
import { runFastify } from '../utilities/fastify';

Expand Down
10 changes: 10 additions & 0 deletions src/services/cogrpc/server.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { Logger } from 'pino';
import selfsigned from 'selfsigned';

import { makeMockLogging, mockSpy, partialPinoLog } from '../../_test_utils';
import * as exitHandling from '../../utilities/exitHandling';
import * as logging from '../../utilities/logging';
import { configureMockEnvVars, getMockContext } from '../_test_utils';
import { MAX_RAMF_MESSAGE_SIZE } from '../constants';
Expand Down Expand Up @@ -33,6 +34,8 @@ const mockSelfSignedOutput = {
};
const mockSelfSigned = mockSpy(jest.spyOn(selfsigned, 'generate'), () => mockSelfSignedOutput);

const mockExitHandler = mockSpy(jest.spyOn(exitHandling, 'configureExitHandling'));

const BASE_ENV_VARS = {
GATEWAY_KEY_ID: 'base64-encoded key id',
NATS_CLUSTER_ID: 'nats-cluster-id',
Expand All @@ -47,6 +50,13 @@ const mockLogger = makeMockLogging().logger;
const mockMakeLogger = mockSpy(jest.spyOn(logging, 'makeLogger'), () => mockLogger);

describe('runServer', () => {
test('Exit handler should be configured as the very first step', async () => {
mockEnvVars({});

await expect(runServer()).toReject();
expect(mockExitHandler).toBeCalledWith(mockLogger);
});

test.each([
'GATEWAY_KEY_ID',
'NATS_SERVER_URL',
Expand Down
5 changes: 4 additions & 1 deletion src/services/cogrpc/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { KeyCertPair, Server, ServerCredentials } from 'grpc';
import grpcHealthCheck from 'grpc-health-check';
import { Logger } from 'pino';
import * as selfsigned from 'selfsigned';
import { configureExitHandling } from '../../utilities/exitHandling';

import { makeLogger } from '../../utilities/logging';
import { MAX_RAMF_MESSAGE_SIZE } from '../constants';
Expand All @@ -19,6 +20,9 @@ const MAX_CONNECTION_AGE_GRACE_SECONDS = 30;
const MAX_CONNECTION_IDLE_SECONDS = 5;

export async function runServer(logger?: Logger): Promise<void> {
const baseLogger = logger ?? makeLogger();
configureExitHandling(baseLogger);

const gatewayKeyIdBase64 = getEnvVar('GATEWAY_KEY_ID').required().asString();
const publicAddress = getEnvVar('PUBLIC_ADDRESS').required().asString();
const parcelStoreBucket = getEnvVar('OBJECT_STORE_BUCKET').required().asString();
Expand All @@ -34,7 +38,6 @@ export async function runServer(logger?: Logger): Promise<void> {
'grpc.max_receive_message_length': MAX_RECEIVED_MESSAGE_LENGTH,
});

const baseLogger = logger ?? makeLogger();
const serviceImplementation = await makeServiceImplementation({
baseLogger,
gatewayKeyIdBase64,
Expand Down
28 changes: 27 additions & 1 deletion src/services/crcQueueWorker.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import * as privateKeyStore from '../backingServices/keyStores';
import * as mongo from '../backingServices/mongo';
import { NatsStreamingClient } from '../backingServices/natsStreaming';
import * as objectStorage from '../backingServices/objectStorage';
import * as exitHandling from '../utilities/exitHandling';
import * as logging from '../utilities/logging';
import {
castMock,
Expand Down Expand Up @@ -61,7 +62,10 @@ beforeEach(() => {
makeQueueConsumer: jest.fn().mockImplementation(mockMakeQueueConsumer),
});
});
mockSpy(jest.spyOn(NatsStreamingClient, 'initFromEnv'), () => mockNatsClient);
const mockNatsInitFromEnv = mockSpy(
jest.spyOn(NatsStreamingClient, 'initFromEnv'),
() => mockNatsClient,
);

//region Mongoose-related fixtures

Expand Down Expand Up @@ -143,13 +147,35 @@ beforeAll(() => {
});
const mockMakeLogger = mockSpy(jest.spyOn(logging, 'makeLogger'), () => mockLogging.logger);

const mockExitHandler = mockSpy(jest.spyOn(exitHandling, 'configureExitHandling'));

describe('Queue subscription', () => {
test('Logger should be configured', async () => {
await processIncomingCrcCargo(STUB_WORKER_NAME);

expect(mockMakeLogger).toBeCalledWith();
});

test('Exit handler should be configured as the very first step', async () => {
const error = new Error('oh noes');
mockNatsInitFromEnv.mockImplementation(() => {
throw error;
});

await expect(processIncomingCrcCargo(STUB_WORKER_NAME)).rejects.toEqual(error);
expect(mockExitHandler).toBeCalledWith(
expect.toSatisfy((logger) => logger.bindings().worker === STUB_WORKER_NAME),
);
});

test('Start of the queue should be logged', async () => {
await processIncomingCrcCargo(STUB_WORKER_NAME);

expect(mockLogging.logs).toContainEqual(
partialPinoLog('info', 'Starting queue worker', { worker: STUB_WORKER_NAME }),
);
});

test('Worker should subscribe to channel "crc-cargo"', async () => {
await processIncomingCrcCargo(STUB_WORKER_NAME);

Expand Down
13 changes: 8 additions & 5 deletions src/services/crcQueueWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@ import { initVaultKeyStore } from '../backingServices/keyStores';
import { createMongooseConnectionFromEnv } from '../backingServices/mongo';
import { NatsStreamingClient } from '../backingServices/natsStreaming';
import { initObjectStoreFromEnv } from '../backingServices/objectStorage';
import { configureExitHandling } from '../utilities/exitHandling';
import { makeLogger } from '../utilities/logging';
import { MongoPublicKeyStore } from './MongoPublicKeyStore';
import { ParcelStore } from './parcelStore';

export async function processIncomingCrcCargo(workerName: string): Promise<void> {
const logger = makeLogger().child({ worker: workerName });
configureExitHandling(logger);
logger.info('Starting queue worker');

const natsStreamingClient = NatsStreamingClient.initFromEnv(workerName);

const queueConsumer = natsStreamingClient.makeQueueConsumer(
Expand All @@ -32,16 +37,14 @@ export async function processIncomingCrcCargo(workerName: string): Promise<void>
undefined,
'-consumer',
);
await pipe(queueConsumer, makeCargoProcessor(workerName, natsStreamingClient));
await pipe(queueConsumer, makeCargoProcessor(natsStreamingClient, logger));
}

function makeCargoProcessor(
workerName: string,
natsStreamingClient: NatsStreamingClient,
logger: Logger,
): (messages: AsyncIterable<Message>) => Promise<void> {
return async (messages) => {
const logger = makeLogger();

const mongooseConnection = await createMongooseConnectionFromEnv();
const gateway = new Gateway(initVaultKeyStore(), new MongoPublicKeyStore(mongooseConnection));

Expand All @@ -54,7 +57,7 @@ function makeCargoProcessor(
await processCargo(
message,
gateway,
logger.child({ worker: workerName }),
logger,
parcelStore,
mongooseConnection,
natsStreamingClient,
Expand Down
Loading

0 comments on commit 68ea854

Please sign in to comment.