forked from sensu-plugins/sensu-plugins-mesos
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check-mesos-lost-tasks.rb
executable file
·146 lines (128 loc) · 3.59 KB
/
check-mesos-lost-tasks.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#! /usr/bin/env ruby
#
# check-mesos-lost-tasks
#
# DESCRIPTION:
# This plugin checks that there are less or same number of lost tasks than provided on a Mesos cluster
#
# OUTPUT:
# plain text
#
# PLATFORMS:
# Linux
#
# DEPENDENCIES:
# gem: sensu-plugin
# gem: rest-client
# gem: json
#
# USAGE:
# #YELLOW
#
# NOTES:
#
# LICENSE:
# Copyright 2016, Oskar Flores ([email protected])
# Released under the same terms as Sensu (the MIT license); see LICENSE
# for details.
#
require 'sensu-plugin/check/cli'
require 'rest-client'
require 'json'
require 'daybreak'
class MesosLostTasksCheck < Sensu::Plugin::Check::CLI
check_name 'CheckMesosLostTasks'
@metrics_name = 'master/tasks_lost'.freeze
class << self
attr_reader :metrics_name
end
option :server,
description: 'Mesos server',
short: '-s SERVER',
long: '--server SERVER',
default: 'localhost'
option :port,
description: 'port (default 5050)',
short: '-p PORT',
long: '--port PORT',
default: 5050,
required: false
option :timeout,
description: 'timeout in seconds',
short: '-t TIMEOUT',
long: '--timeout TIMEOUT',
proc: proc(&:to_i),
default: 5
option :protocol,
description: 'Marathon protocol [http/https]',
short: '-P PROTOCOL',
long: '--protocol PROTOCOL',
required: false,
default: 'http'
option :uri,
description: 'Endpoint URI',
short: '-u URI',
long: '--uri URI',
default: '/metrics/snapshot'
option :value,
description: 'value to check against',
short: '-v VALUE',
long: '--value VALUE',
default: 0,
proc: proc(&:to_i),
required: false
option :delta,
short: '-d',
long: '--delta',
description: 'Use this flag to compare the metric with the previously retreived value',
boolean: true
def run
if config[:value] < 0
unknown 'Number of lost tasks cannot be negative, please set --value to a number greater or equal to 0'
end
server = config[:server]
port = config[:port]
uri = config[:uri]
timeout = config[:timeout]
value = config[:value]
begin
server = get_leader_url server, port
# remove comment for debugging purpose
# puts(server)
r = RestClient::Resource.new("#{server}#{uri}", timeout).get
tasks_lost = check_tasks(r)
if config[:delta]
db = Daybreak::DB.new '/tmp/mesos-metrics.db', default: 0
prev_value = db["task_#{MesosLostTasksCheck.metrics_name}"]
db.lock do
db["task_#{MesosLostTasksCheck.metrics_name}"] = tasks_lost
end
tasks_lost -= prev_value
db.flush
db.compact
db.close
end
if tasks_lost >= value
critical "The number of LOST tasks [#{tasks_lost}] is bigger than provided [#{value}]!"
end
end
ok
end
def get_leader_url(server, port)
RestClient::Resource.new("#{config[:protocol]}://#{server}:#{port}/redirect").get.request.url
end
# Parses JSON data as returned from Mesos's metrics API
# @param data [String] Server response
# @return [Integer] Number of lost tasks in Mesos
def check_tasks(data)
begin
tasks_lost = JSON.parse(data)[MesosLostTasksCheck.metrics_name]
rescue JSON::ParserError
raise "Could not parse JSON response: #{data}"
end
if tasks_lost.nil?
raise "No metrics for [#{MesosLostTasksCheck.metrics_name}] in server response: #{data}"
end
tasks_lost.round.to_i
end
end