root/resources/check_resmon_metric

Revision 8ef3e590f3af9743cdd47f509206555222b1923a, 5.4 kB (checked in by Mark Harrison <mark@omniti.com>, 5 years ago)

Nagios check for resmon metrics

git-svn-id: https://labs.omniti.com/resmon/branches/resmon2@283 8c0face9-b7db-6ec6-c4b3-d5f7145c7d55

  • Property mode set to 100755
Line 
1 #!/usr/bin/perl -w
2 # Nagios script to check a resmon monitor's metrics and trigger alerts based
3 # on various rules
4
5 # Remove the following line to disable embedded perl
6 # nagios: +epn
7
8 use vars qw($PROGNAME);
9 if ($0 =~ m/^(.*?)[\/\\]([^\/\\]+)$/) {
10         $PROGNAME = $2;
11 }
12
13 use strict;
14 use warnings;
15 use LWP::UserAgent;
16 use HTTP::Request;
17 use HTTP::Response;
18 use Time::HiRes qw( gettimeofday tv_interval );
19 use XML::Simple;
20 use Getopt::Long;
21
22 use utils qw($TIMEOUT %ERRORS &print_revision &support);
23
24 delete @ENV{'PATH', 'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
25
26 my ($port, $host, $module, $check, $age, $warning, $critical, $regex,
27     $metric) = (81,"","","",0, "", "", "");
28
29 sub help {
30     print "Usage: $0 [options]";
31     print " -H | --hostname     host to check\n";
32     print " -P | --port         port that resmon runs on (default 81)\n";
33     print " -M | --module       module to check\n";
34     print " -C | --check        name of individual check\n";
35     print " -A | --age          how recently should the check have been";
36     print " updated\n";
37     print " -m | --metric       metric name to check\n";
38     print " -w | --warning      warning threshold (numeric metrics only)\n";
39     print " -c | --critical     critical threshold (numeric metrics only)\n";
40     print " -r | --regex        regex match against the metric (string";
41     print " metrics only)\n";
42     exit $ERRORS{'UNKNOWN'};
43 }
44
45 sub short_help {
46     print "Usage: $0 -H host -M module -C check -m metric [options]\n";
47     print "run $0 --help for more information\n";
48     exit $ERRORS{'UNKNOWN'};
49 }
50
51 sub check_threshold {
52     my ($value, $warning, $critical) = @_;
53     my ($state, $message, $warnmessage, $critmessage) = (0,"","","");
54     if ($critical) {
55         ($state, $critmessage) = check_single_threshold($value, $critical);
56         if (!$state) {
57             return ("CRITICAL", $critmessage);
58         }
59         $message = $critmessage;
60     }
61     if ($warning) {
62         ($state, $warnmessage) = check_single_threshold($value, $warning);
63         if (!$state) {
64             return ("WARNING", $warnmessage);
65         }
66         if ($message) {
67             $message .= " and $warnmessage";
68         } else {
69             $message = $warnmessage;
70         }
71     }
72     return ("OK", $message);
73 }
74
75 sub check_single_threshold {
76     my ($value, $threshold) = @_;
77     my ($inclusive, $start, $end) = (
78         $threshold =~ /(\@?)(?:(-?[0-9.]+|~):)?(-?[0-9.]+|~)?/);
79     $start ||= 0;
80     $end ||= "";
81     my $message;
82     my $goodmessage;
83     my $badmessage;
84
85     my $state = 1;
86     if ($start eq "~") {
87         $badmessage = "$value > $end";
88         $goodmessage = "$value <= $end";
89         if ($value > $end) {
90             $state = 0;
91         }
92     } elsif ($end eq "") {
93         $badmessage = "$value < $start";
94         $goodmessage = "$value >= $start";
95         if ($value < $start) {
96             $state = 0;
97         }
98     } else {
99         $badmessage = "$value outside range ($start to $end)";
100         $goodmessage = "$start <= $value <= $end";
101         if ($value < $start || $value > $end) {
102             $state = 0;
103         }
104     }
105
106     $message = $state ? $goodmessage : $badmessage;
107
108     # Negate the result if inclusive
109     if ($inclusive) {
110         $state = $state ? 0 : 1;
111     }
112     return ($state, $message);
113 }
114
115 Getopt::Long::Configure('bundling', 'no_ignore_case');
116 GetOptions (
117     "h|help"       => \&help,
118     "H|host=s"     => \$host,
119     "P|port=i"     => \$port,
120     "M|module=s"   => \$module,
121     "C|check=s"    => \$check,
122     "A|age=i"      => \$age,
123     "m|metric=s"   => \$metric,
124     "w|warning=s"  => \$warning,
125     "c|critical=s" => \$critical,
126     "r|regex=s"    => \$regex);
127
128 unless ($host && $module && $check) {
129     short_help();
130 }
131
132 if (($warning || $critical) && $regex) {
133     print "Cannot specify both numeric thresholds and a string based match\n";
134     exit $ERRORS{'UNKNOWN'};
135 }
136
137 my $ua = LWP::UserAgent->new;
138 my $t = HTTP::Request->new('GET', "http://$host:$port/$module/$check");
139 my $xs = XML::Simple->new();
140 my $state = "UNKNOWN";
141 eval {
142     my $ref;
143     # Make the HTTP request
144     my $res = $ua->request($t);
145     die "CRITICAL: could not fetch\n" unless($res && $res->is_success);
146     # Parse the xml
147     eval { $ref = $xs->XMLin($res->content, ForceArray => 1); };
148     die "CRITICAL: error parsing XML\n" if($@);
149
150     # Debugging
151     #use Data::Dumper;
152     #print Dumper($ref->{ResmonResult});
153
154     # If we have stale information, then go critical
155     my $last_update = time() - $ref->{ResmonResult}->[0]->{last_update}->[0];
156     die "Stale metrics. Last updated $last_update seconds ago"
157         if($age && $age < $last_update);
158
159     # Get the metrics
160     my $metricval = $ref->{ResmonResult}->[0]->{metric}->{$metric};
161     my $value = $metricval->{content};
162     my $type = $metricval->{type} || "0";
163
164     # Note: if type is auto (0), then we assume it can be treated as a number
165     # of some sort. If you're specifying a warning/critical threshold, then
166     # you are too.
167     die "Numeric threshold specified for a non-numeric metric"
168         if (($warning || $critical) && $type !~ /[0IlLn]/);
169
170     if ($regex) {
171         if ($value =~ /$regex/) {
172             $state = "OK";
173         } else {
174             $state = "CRITICAL";
175         }
176     }
177
178     if ($warning || $critical) {
179         my $message;
180         ($state, $message) = check_threshold($value, $warning, $critical);
181         print "$state: $metric - $message\n";
182     }
183 };
184
185 if($@) {
186     chomp($@);
187     print "CRITICAL: $@\n";
188     exit $ERRORS{'CRITICAL'};
189 } else {
190     exit $ERRORS{$state};
191 }
Note: See TracBrowser for help on using the browser.