Skip to content

Commit

Permalink
added alarm on avg cpu utilization of ec2 instance (#182)
Browse files Browse the repository at this point in the history
Signed-off-by: Rishabh Singh <[email protected]>
  • Loading branch information
rishabh6788 authored Aug 8, 2022
1 parent cc20cdc commit 1168888
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 16 deletions.
5 changes: 0 additions & 5 deletions lib/compute/jenkins-main-node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,12 @@ export class JenkinsMainNode {
public readonly ec2Instance: Instance;

public readonly ec2InstanceMetrics: {
cpuTime: Metric,
memUsed: Metric,
foundJenkinsProcessCount: Metric
}

constructor(stack: Stack, props: JenkinsMainNodeProps, agentNode: AgentNodeProps[], macAgent: string, assumeRole?: string[]) {
this.ec2InstanceMetrics = {
cpuTime: new Metric({
metricName: 'procstat_cpu_usage',
namespace: `${stack.stackName}/JenkinsMainNode`,
}),
memUsed: new Metric({
metricName: 'mem_used_percent',
namespace: `${stack.stackName}/JenkinsMainNode`,
Expand Down
27 changes: 17 additions & 10 deletions lib/monitoring/ci-alarms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/

import {
Alarm, AlarmWidget, ComparisonOperator, Dashboard, TreatMissingData,
Alarm, AlarmWidget, ComparisonOperator, Dashboard, Metric, TreatMissingData,
} from '@aws-cdk/aws-cloudwatch';
import { Stack } from '@aws-cdk/core';
import { JenkinsExternalLoadBalancer } from '../network/ci-external-load-balancer';
Expand All @@ -19,6 +19,22 @@ export class JenkinsMonitoring {
constructor(stack: Stack, externalLoadBalancer: JenkinsExternalLoadBalancer, mainNode: JenkinsMainNode) {
const dashboard = new Dashboard(stack, 'AlarmDashboard');

const cpuMetric = new Metric({
namespace: 'AWS/EC2',
metricName: 'CPUUtilization',
dimensionsMap: {
InstanceId: mainNode.ec2Instance.instanceId,
},
});

this.alarms.push(new Alarm(stack, 'AverageMainNodeCpuUtilization', {
alarmDescription: 'Overall EC2 avg CPU Utilization',
evaluationPeriods: 3,
metric: cpuMetric,
threshold: 50,
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
}));

this.alarms.push(new Alarm(stack, 'ExternalLoadBalancerUnhealthyHosts', {
alarmDescription: 'If any hosts behind the load balancer are unhealthy',
metric: externalLoadBalancer.targetGroup.metricUnhealthyHostCount(),
Expand All @@ -37,15 +53,6 @@ export class JenkinsMonitoring {
treatMissingData: TreatMissingData.IGNORE,
}));

this.alarms.push(new Alarm(stack, 'MainNodeHighCpuUtilization', {
alarmDescription: 'The jenkins process is using much more CPU that expected, it should be investigated for a stuck process/job',
metric: mainNode.ec2InstanceMetrics.cpuTime.with({ statistic: 'avg' }),
evaluationPeriods: 5,
threshold: 50,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
treatMissingData: TreatMissingData.IGNORE,
}));

this.alarms.push(new Alarm(stack, 'MainNodeHighMemoryUtilization', {
alarmDescription: 'The jenkins process is using more memory than expected, it should be investigated for a large number of jobs or heavy weight jobs',
metric: mainNode.ec2InstanceMetrics.memUsed.with({ statistic: 'avg' }),
Expand Down
2 changes: 1 addition & 1 deletion test/ci-stack.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ test('CloudwatchCpuAlarm', () => {

// THEN
expect(stack).to(haveResourceLike('AWS::CloudWatch::Alarm', {
MetricName: 'procstat_cpu_usage',
MetricName: 'CPUUtilization',
Statistic: 'Average',
}, ResourcePart.Properties));
});
Expand Down

0 comments on commit 1168888

Please sign in to comment.