root/resources/check_resmon_metric

Revision d1676da2e0d716d56185d00ab5e64cca6cb6d2de, 9.5 kB (checked in by Mark Harrison <mark@omniti.com>, 4 years ago)

Integers are numeric too

(Reported by Bryan Horstmann-Allen)

git-svn-id: https://labs.omniti.com/resmon/trunk@429 8c0face9-b7db-6ec6-c4b3-d5f7145c7d55

  • Property mode set to 100755
Line 
1 #!/usr/bin/perl -w
2 # Remove the following line to disable embedded perl
3 # nagios: +epn
4
5 =pod
6
7 =head1 NAME
8
9 check_resmon_metric - Nagios check to monitor a resmon metric
10
11 =head1 SYNOPSIS
12
13 check_resmon_metric -H hostname [-P port] -M module_name -C check_name
14                     -m metric_name [ -r regex | [-w warning_threshold]
15                     [-c critical_threshold] ] [ -u alternate_url ]
16
17 =head1 DESCRIPTION
18
19 This is a nagios check script that will connect to a running resmon instance,
20 fetch information for a single check, and compare a metric against rules
21 provided on the command line, returning the status to nagios based on the
22 result.
23
24 For numeric metrics, there are options for warning/critical thresholds, and
25 for string metrics, there is an option to match against a regular expression.
26
27 =head1 OPTIONS
28
29 =over
30
31 =item -H hostname
32
33 The hostname of the resmon instance to connect to. Required.
34
35 =item -P port
36
37 The port that resmon is listening on. Defaults to 81.
38
39 =item -M module_name
40
41 The module name of the check you wish to fetch. Required.
42
43 =item -C check_name
44
45 The check name you wish to fetch. Required.
46
47 =item -m metric_name
48
49 The name of the metric you wish to evaluate rules against. Required.
50
51 =item -w warning_threshold
52
53 A numeric threshold (see below) to test the metric against. Will return a
54 warning status if the threshold matches. This is only applicable for numeric
55 metrics. An error will be returned for string metrics. Optional.
56
57 =item -c critical_threshold
58
59 A numeric threshold (see below) to test the metric against. Will return a
60 critical status if the threshold matches. This is only applicable for numeric
61 metrics. An error will be returned for string metrics. If both the critical
62 and warning thresholds match a given metric, then the status returned is
63 critical.  Optional.
64
65 =item -r regex
66
67 A regular expression to match the metric against. This is most useful for
68 string based metrics. If the regular expression matches, then an ok status
69 will be returned, otherwise the status will be critical. This cannot be used
70 in conjunction with the warning/critical thresholds. Optional.
71
72 =item -A age
73
74 This option will cause a check to be critical if it was updated more than age
75 seconds ago.
76
77 =item -u url
78
79 Normally this check will fetch metrics from http://host:port/module/check, but
80 if you are using this to check a different system that exposes metrics in a
81 similar fashion to resmon, you can specify an alternate url here. You only
82 need to specify the part after hostname/port. For example, if you need to hit
83 http://host:port/resmon then pass -u /resmon.
84
85 =back
86
87 =head1 THRESHOLDS
88
89 The warning and critical thresholds are specified in the same way as for other
90 nagios plugins, as desribed at
91 http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
92
93 Most of the time, you just want to put a number for the threshold. For
94 example, '-w 10 -c 20' would go warning if the value is over 10, and critical
95 if it is over 20.
96
97 For more complex ranges, use the more generalized format of: [@]start:end
98
99 =over
100
101 =item *
102
103 Start and end are both numbers, and either can be negative
104
105 =item *
106
107 If start is missing, then the colon isn't required, and the range is from 0 to
108 end. In other words, if you specify a single number, the range is from 0 to
109 the number you specified.
110
111 =item *
112
113 If end is missing (E.g. 10:), then the range is from start to infinity.
114
115 =item *
116
117 If start is '~', then the range is from minus infinity to end (as opposed to
118 0-end if start is omitted)
119
120 =item *
121
122 An alert is generated if the test value lies outside the specified range.
123
124 =item *
125
126 If @ is specified at the beginning, the alerting behavior is flipped. In other
127 words, an alert is generated if the value lies within the range.
128
129 =back
130
131 =head2 EXAMPLES
132
133 The following lists example thresholds, and the conditions under which an
134 alert is generated.
135
136 =over
137
138 =item '10'
139
140 Ok if: 0 <= X <= 10, Alert if: X < 0 or X > 10
141
142 =item '10:'
143
144 Ok if: X >= 10, Alert if: X < 10
145
146 =item '~:10'
147
148 Ok if: X <= 10, Alert if: X > 10
149
150 =item '10:20'
151
152 Ok if: 10 <= X <= 20, Alert if: X < 10 or X > 20
153
154 =item '@10:20'
155
156 Ok if: X < 10 or X > 20, Alert if: 10 <= X <= 20
157
158 =back
159
160 =cut
161
162 use vars qw($PROGNAME);
163 if ($0 =~ m/^(.*?)[\/\\]([^\/\\]+)$/) {
164         $PROGNAME = $2;
165 }
166
167 use strict;
168 use warnings;
169
170 use LWP::UserAgent;
171 use HTTP::Request;
172 use HTTP::Response;
173 use Time::HiRes qw( gettimeofday tv_interval );
174 use XML::Simple;
175 use Getopt::Long;
176
177 use utils qw($TIMEOUT %ERRORS &print_revision &support);
178
179 delete @ENV{'PATH', 'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
180
181 my ($port, $host, $module, $check, $age, $warning, $critical, $regex,
182     $metric, $url) = (81,"","","",0, "", "", "", "");
183
184 sub help {
185     print "Usage: $0 [options]\n";
186     print " -H | --hostname     host to check\n";
187     print " -P | --port         port that resmon runs on (default 81)\n";
188     print " -M | --module       module to check\n";
189     print " -C | --check        name of individual check\n";
190     print " -A | --age          how recently should the check have been";
191     print " updated\n";
192     print " -m | --metric       metric name to check\n";
193     print " -w | --warning      warning threshold (numeric metrics only)\n";
194     print " -c | --critical     critical threshold (numeric metrics only)\n";
195     print " -r | --regex        regex match against the metric (string";
196     print " metrics only)\n";
197     print " -u | --url          specify an alternate URL to fetch\n";
198     exit $ERRORS{'UNKNOWN'};
199 }
200
201 sub short_help {
202     print "Usage: $0 -H host -M module -C check -m metric [options]\n";
203     print "run $0 --help for more information\n";
204     exit $ERRORS{'UNKNOWN'};
205 }
206
207 sub check_threshold {
208     my ($value, $warning, $critical) = @_;
209     my ($state, $message, $warnmessage, $critmessage) = (0,"","","");
210     if ($critical) {
211         ($state, $critmessage) = check_single_threshold($value, $critical);
212         if (!$state) {
213             return ("CRITICAL", $critmessage);
214         }
215         $message = $critmessage;
216     }
217     if ($warning) {
218         ($state, $warnmessage) = check_single_threshold($value, $warning);
219         if (!$state) {
220             return ("WARNING", $warnmessage);
221         }
222         if ($message) {
223             $message .= " and $warnmessage";
224         } else {
225             $message = $warnmessage;
226         }
227     }
228     return ("OK", $message);
229 }
230
231 sub check_single_threshold {
232     my ($value, $threshold) = @_;
233     my ($inclusive, $start, $end) = (
234         $threshold =~ /(\@?)(?:(-?[0-9.]+|~):)?(-?[0-9.]+|~)?/);
235     $start ||= 0;
236     $end ||= "";
237     my $message;
238     my $goodmessage;
239     my $badmessage;
240
241     my $state = 1;
242     if ($start eq "~") {
243         $badmessage = "$value > $end";
244         $goodmessage = "$value <= $end";
245         if ($value > $end) {
246             $state = 0;
247         }
248     } elsif ($end eq "") {
249         $badmessage = "$value < $start";
250         $goodmessage = "$value >= $start";
251         if ($value < $start) {
252             $state = 0;
253         }
254     } else {
255         $badmessage = "$value outside range ($start to $end)";
256         $goodmessage = "$start <= $value <= $end";
257         if ($value < $start || $value > $end) {
258             $state = 0;
259         }
260     }
261
262     $message = $state ? $goodmessage : $badmessage;
263
264     # Negate the result if inclusive
265     if ($inclusive) {
266         $state = $state ? 0 : 1;
267     }
268     return ($state, $message);
269 }
270
271 Getopt::Long::Configure('bundling', 'no_ignore_case');
272 GetOptions (
273     "h|help"       => \&help,
274     "H|host=s"     => \$host,
275     "P|port=i"     => \$port,
276     "M|module=s"   => \$module,
277     "C|check=s"    => \$check,
278     "A|age=i"      => \$age,
279     "m|metric=s"   => \$metric,
280     "w|warning=s"  => \$warning,
281     "c|critical=s" => \$critical,
282     "r|regex=s"    => \$regex,
283     "u|url=s"      => \$url);
284
285 unless ($host && $module && $check) {
286     short_help();
287 }
288
289 if (($warning || $critical) && $regex) {
290     print "Cannot specify both numeric thresholds and a string based match\n";
291     exit $ERRORS{'UNKNOWN'};
292 }
293
294 my $ua = LWP::UserAgent->new;
295 $url = "/$module/$check" unless $url;
296 my $t = HTTP::Request->new('GET', "http://$host:$port$url");
297 my $xs = XML::Simple->new();
298 my $state = "UNKNOWN";
299 eval {
300     my $ref;
301     # Make the HTTP request
302     my $res = $ua->request($t);
303     die "CRITICAL: could not fetch\n" unless($res && $res->is_success);
304     # Parse the xml
305     eval { $ref = $xs->XMLin($res->content, ForceArray => 1); };
306     die "CRITICAL: error parsing XML\n" if($@);
307
308     # Debugging
309     #use Data::Dumper;
310     #print Dumper($ref->{ResmonResult});
311
312     # If we have stale information, then go critical
313     my $last_update = time() - $ref->{ResmonResult}->[0]->{last_update}->[0];
314     die "Stale metrics. Last updated $last_update seconds ago"
315         if($age && $age < $last_update);
316
317     # Get the metrics
318     my $metricval = $ref->{ResmonResult}->[0]->{metric}->{$metric};
319     my $value = $metricval->{content};
320     my $type = $metricval->{type} || "0";
321
322     # Note: if type is auto (0), then we assume it can be treated as a number
323     # of some sort. If you're specifying a warning/critical threshold, then
324     # you are too.
325     die "Numeric threshold specified for a non-numeric metric"
326         if (($warning || $critical) && $type !~ /[0IlLni]/);
327
328     if ($regex) {
329         if ($value =~ /$regex/) {
330             $state = "OK";
331         } else {
332             $state = "CRITICAL";
333         }
334         print "$state: $value\n";
335     }
336
337     if ($warning || $critical) {
338         my $message;
339         ($state, $message) = check_threshold($value, $warning, $critical);
340         print "$state: $metric - $message\n";
341     }
342 };
343
344 if($@) {
345     chomp($@);
346     print "CRITICAL: $@\n";
347     exit $ERRORS{'CRITICAL'};
348 } else {
349     exit $ERRORS{$state};
350 }
Note: See TracBrowser for help on using the browser.