Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Ensure fatal logs are issued reliably when processes crash #399

Merged
merged 18 commits into from
Feb 6, 2021
Merged
2 changes: 1 addition & 1 deletion .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
- run: npm run test:functional
env:
IS_GITHUB: 'true'
timeout-minutes: 1
timeout-minutes: 2
- name: Output diagnostic information upon failure
if: failure()
run: .github/scripts/output-diagnostics.sh
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ FROM node:12.20.1-slim
WORKDIR /opt/gw
COPY --from=build /tmp/gw ./
USER node
ENTRYPOINT ["node"]
ENTRYPOINT ["node", "--unhandled-rejections=strict"]
EXPOSE 8080
3 changes: 1 addition & 2 deletions chart/templates/cogrpc-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/cogrpc-server.js
env:
- name: PUBLIC_ADDRESS
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/crcin-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/crc-queue-worker.js
env:
- name: WORKER_ID
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/keygen-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ spec:
- name: keygen
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/generate-keypairs.js
envFrom:
- configMapRef:
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/pdcout-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/pdc-outgoing-queue-worker.js
env:
- name: POHTTP_ADDRESS
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/pohttp-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/pohttp-server.js
env:
{{- if .Values.proxyRequestIdHeader }}
Expand Down
3 changes: 1 addition & 2 deletions chart/templates/poweb-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ spec:
{{- toYaml .Values.securityContext | nindent 12 }}
image: {{ include "relaynet-internet-gateway.image" . }}
imagePullPolicy: IfNotPresent
command:
- node
args:
- build/main/bin/poweb-server.js
env:
{{- if .Values.proxyRequestIdHeader }}
Expand Down
2 changes: 1 addition & 1 deletion docs/instrumentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ We use log levels as follows:
- `info`: Events for any outcome observed outside the gateway, or any unusual interaction with a backing service.
- `warning`: Something has gone wrong but it's being handled gracefully. Triage can start on the next working day.
- `error`: Something has gone wrong and triage must start within a few minutes. Wake up an SRE if necessary.
- `fatal`: Not used.
- `fatal`: A process has crashed.
5 changes: 0 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
"grpc": "^1.24.4",
"grpc-health-check": "^1.8.0",
"it-pipe": "^1.1.0",
"make-promises-safe": "^5.1.0",
"mongoose": "^5.10.18",
"node-nats-streaming": "^0.3.2",
"pino": "^6.11.0",
Expand Down
3 changes: 0 additions & 3 deletions src/bin/cogrpc-server.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { runServer } from '../services/cogrpc/server';

runServer();
6 changes: 0 additions & 6 deletions src/bin/crc-queue-worker.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { get as getEnvVar } from 'env-var';

import { processIncomingCrcCargo } from '../services/crcQueueWorker';

const WORKER_ID = getEnvVar('WORKER_ID').required().asString();
processIncomingCrcCargo(WORKER_ID);

// tslint:disable-next-line:no-console
console.log(`Starting queue worker ${WORKER_ID}`);
21 changes: 11 additions & 10 deletions src/bin/generate-keypairs.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
// tslint:disable:no-console

// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import {
Certificate,
generateECDHKeyPair,
Expand All @@ -17,6 +12,11 @@ import { get as getEnvVar } from 'env-var';
import { initVaultKeyStore } from '../backingServices/keyStores';
import { createMongooseConnectionFromEnv } from '../backingServices/mongo';
import { OwnCertificate } from '../services/models';
import { configureExitHandling } from '../utilities/exitHandling';
import { makeLogger } from '../utilities/logging';

const LOGGER = makeLogger();
configureExitHandling(LOGGER);

const NODE_CERTIFICATE_TTL_DAYS = 180;
const SESSION_CERTIFICATE_TTL_DAYS = 60;
Expand All @@ -29,10 +29,10 @@ async function main(): Promise<void> {
const keyId = Buffer.from(KEY_ID_BASE64, 'base64');
try {
await sessionStore.fetchNodeKey(keyId);
console.warn(`Gateway key ${KEY_ID_BASE64} already exists`);
LOGGER.warn(`Gateway key ${KEY_ID_BASE64} already exists`);
return;
} catch (error) {
console.log(`Gateway key will be created because it doesn't already exist`);
LOGGER.info(`Gateway key will be created because it doesn't already exist`);
}

const gatewayKeyPair = await generateRSAKeyPair();
Expand Down Expand Up @@ -66,12 +66,13 @@ async function main(): Promise<void> {
});
await sessionStore.saveInitialSessionKey(initialSessionKeyPair.privateKey, initialKeyCertificate);

console.log(
JSON.stringify({
LOGGER.info(
{
gatewayCertificate: base64Encode(gatewayCertificate.serialize()),
initialSessionCertificate: base64Encode(initialKeyCertificate.serialize()),
keyPairId: KEY_ID_BASE64,
}),
},
'Key pairs were successfully generated',
);
}

Expand Down
3 changes: 0 additions & 3 deletions src/bin/pdc-outgoing-queue-worker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { get as getEnvVar } from 'env-var';

import { processInternetBoundParcels } from '../services/internetBoundParcelsQueueWorker';
Expand Down
3 changes: 0 additions & 3 deletions src/bin/pohttp-server.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { makeServer } from '../services/pohttp/server';
import { runFastify } from '../utilities/fastify';

Expand Down
3 changes: 0 additions & 3 deletions src/bin/poweb-server.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// tslint:disable-next-line:no-var-requires
require('make-promises-safe');

import { makeServer } from '../services/poweb/server';
import { runFastify } from '../utilities/fastify';

Expand Down
10 changes: 10 additions & 0 deletions src/services/cogrpc/server.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { Logger } from 'pino';
import selfsigned from 'selfsigned';

import { makeMockLogging, mockSpy, partialPinoLog } from '../../_test_utils';
import * as exitHandling from '../../utilities/exitHandling';
import * as logging from '../../utilities/logging';
import { configureMockEnvVars, getMockContext } from '../_test_utils';
import { MAX_RAMF_MESSAGE_SIZE } from '../constants';
Expand Down Expand Up @@ -33,6 +34,8 @@ const mockSelfSignedOutput = {
};
const mockSelfSigned = mockSpy(jest.spyOn(selfsigned, 'generate'), () => mockSelfSignedOutput);

const mockExitHandler = mockSpy(jest.spyOn(exitHandling, 'configureExitHandling'));

const BASE_ENV_VARS = {
GATEWAY_KEY_ID: 'base64-encoded key id',
NATS_CLUSTER_ID: 'nats-cluster-id',
Expand All @@ -47,6 +50,13 @@ const mockLogger = makeMockLogging().logger;
const mockMakeLogger = mockSpy(jest.spyOn(logging, 'makeLogger'), () => mockLogger);

describe('runServer', () => {
test('Exit handler should be configured as the very first step', async () => {
mockEnvVars({});

await expect(runServer()).toReject();
expect(mockExitHandler).toBeCalledWith(mockLogger);
});

test.each([
'GATEWAY_KEY_ID',
'NATS_SERVER_URL',
Expand Down
5 changes: 4 additions & 1 deletion src/services/cogrpc/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { KeyCertPair, Server, ServerCredentials } from 'grpc';
import grpcHealthCheck from 'grpc-health-check';
import { Logger } from 'pino';
import * as selfsigned from 'selfsigned';
import { configureExitHandling } from '../../utilities/exitHandling';

import { makeLogger } from '../../utilities/logging';
import { MAX_RAMF_MESSAGE_SIZE } from '../constants';
Expand All @@ -19,6 +20,9 @@ const MAX_CONNECTION_AGE_GRACE_SECONDS = 30;
const MAX_CONNECTION_IDLE_SECONDS = 5;

export async function runServer(logger?: Logger): Promise<void> {
const baseLogger = logger ?? makeLogger();
configureExitHandling(baseLogger);

const gatewayKeyIdBase64 = getEnvVar('GATEWAY_KEY_ID').required().asString();
const publicAddress = getEnvVar('PUBLIC_ADDRESS').required().asString();
const parcelStoreBucket = getEnvVar('OBJECT_STORE_BUCKET').required().asString();
Expand All @@ -34,7 +38,6 @@ export async function runServer(logger?: Logger): Promise<void> {
'grpc.max_receive_message_length': MAX_RECEIVED_MESSAGE_LENGTH,
});

const baseLogger = logger ?? makeLogger();
const serviceImplementation = await makeServiceImplementation({
baseLogger,
gatewayKeyIdBase64,
Expand Down
28 changes: 27 additions & 1 deletion src/services/crcQueueWorker.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import * as privateKeyStore from '../backingServices/keyStores';
import * as mongo from '../backingServices/mongo';
import { NatsStreamingClient } from '../backingServices/natsStreaming';
import * as objectStorage from '../backingServices/objectStorage';
import * as exitHandling from '../utilities/exitHandling';
import * as logging from '../utilities/logging';
import {
castMock,
Expand Down Expand Up @@ -61,7 +62,10 @@ beforeEach(() => {
makeQueueConsumer: jest.fn().mockImplementation(mockMakeQueueConsumer),
});
});
mockSpy(jest.spyOn(NatsStreamingClient, 'initFromEnv'), () => mockNatsClient);
const mockNatsInitFromEnv = mockSpy(
jest.spyOn(NatsStreamingClient, 'initFromEnv'),
() => mockNatsClient,
);

//region Mongoose-related fixtures

Expand Down Expand Up @@ -143,13 +147,35 @@ beforeAll(() => {
});
const mockMakeLogger = mockSpy(jest.spyOn(logging, 'makeLogger'), () => mockLogging.logger);

const mockExitHandler = mockSpy(jest.spyOn(exitHandling, 'configureExitHandling'));

describe('Queue subscription', () => {
test('Logger should be configured', async () => {
await processIncomingCrcCargo(STUB_WORKER_NAME);

expect(mockMakeLogger).toBeCalledWith();
});

test('Exit handler should be configured as the very first step', async () => {
const error = new Error('oh noes');
mockNatsInitFromEnv.mockImplementation(() => {
throw error;
});

await expect(processIncomingCrcCargo(STUB_WORKER_NAME)).rejects.toEqual(error);
expect(mockExitHandler).toBeCalledWith(
expect.toSatisfy((logger) => logger.bindings().worker === STUB_WORKER_NAME),
);
});

test('Start of the queue should be logged', async () => {
await processIncomingCrcCargo(STUB_WORKER_NAME);

expect(mockLogging.logs).toContainEqual(
partialPinoLog('info', 'Starting queue worker', { worker: STUB_WORKER_NAME }),
);
});

test('Worker should subscribe to channel "crc-cargo"', async () => {
await processIncomingCrcCargo(STUB_WORKER_NAME);

Expand Down
13 changes: 8 additions & 5 deletions src/services/crcQueueWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@ import { initVaultKeyStore } from '../backingServices/keyStores';
import { createMongooseConnectionFromEnv } from '../backingServices/mongo';
import { NatsStreamingClient } from '../backingServices/natsStreaming';
import { initObjectStoreFromEnv } from '../backingServices/objectStorage';
import { configureExitHandling } from '../utilities/exitHandling';
import { makeLogger } from '../utilities/logging';
import { MongoPublicKeyStore } from './MongoPublicKeyStore';
import { ParcelStore } from './parcelStore';

export async function processIncomingCrcCargo(workerName: string): Promise<void> {
const logger = makeLogger().child({ worker: workerName });
configureExitHandling(logger);
logger.info('Starting queue worker');

const natsStreamingClient = NatsStreamingClient.initFromEnv(workerName);

const queueConsumer = natsStreamingClient.makeQueueConsumer(
Expand All @@ -32,16 +37,14 @@ export async function processIncomingCrcCargo(workerName: string): Promise<void>
undefined,
'-consumer',
);
await pipe(queueConsumer, makeCargoProcessor(workerName, natsStreamingClient));
await pipe(queueConsumer, makeCargoProcessor(natsStreamingClient, logger));
}

function makeCargoProcessor(
workerName: string,
natsStreamingClient: NatsStreamingClient,
logger: Logger,
): (messages: AsyncIterable<Message>) => Promise<void> {
return async (messages) => {
const logger = makeLogger();

const mongooseConnection = await createMongooseConnectionFromEnv();
const gateway = new Gateway(initVaultKeyStore(), new MongoPublicKeyStore(mongooseConnection));

Expand All @@ -54,7 +57,7 @@ function makeCargoProcessor(
await processCargo(
message,
gateway,
logger.child({ worker: workerName }),
logger,
parcelStore,
mongooseConnection,
natsStreamingClient,
Expand Down
Loading