root/resmon

Revision 4f6b9a6769b1dda1b3f51e76ed640faa700aa6c0, 4.1 kB (checked in by Mark Harrison <mark@omniti.com>, 5 years ago)

Change the check data structure from a list to a hash.

This means that if a particular check is loaded twice (Same module, same check
name), the most recently loaded one will take precedence. This allows included
files to override default behavior with certain checks if required.

git-svn-id: https://labs.omniti.com/resmon/branches/resmon2@372 8c0face9-b7db-6ec6-c4b3-d5f7145c7d55

  • Property mode set to 100755
Line 
1 #!/usr/bin/perl
2
3 BEGIN {
4     (my $dir = $0) =~ s/\/?[^\/]+$//;
5     eval "use lib '$dir/lib';";
6     die $@ if($@);
7 };
8
9 use strict;
10 use warnings;
11 use Time::HiRes qw( gettimeofday tv_interval sleep );
12 use POSIX qw( :sys_wait_h setsid );
13 use Getopt::Long;
14 use Data::Dumper;
15 use vars qw($config_file $debug $status_file $interface $port $config
16 $status $update);
17
18 use Resmon::Config;
19 use Resmon::ExtComm;
20 use Resmon::Status;
21
22 GetOptions(
23     "i=s" => \$interface,
24     "p=i" => \$port,
25     "c=s" => \$config_file,
26     "d"   => \$debug,
27     "f=s" => \$status_file,
28     "u"   => \$update,
29 );
30
31 if ($update) {
32     use Resmon::Updater;
33     (my $resmondir = $0) =~ s/\/?[^\/]+$//;
34     exit(Resmon::Updater::update($debug, $resmondir));
35 }
36
37 $config_file ||= "$0.conf";
38 die "Cannot open configuration file: $config_file" unless (-r $config_file);
39
40 sub configure {
41     $config = Resmon::Config->new($config_file);
42     $config->{statusfile} = $status_file if($status_file);
43     $config->{port} = $port if($port);
44     $config->{interface} = $interface if($interface);
45 }
46
47 configure();
48
49 my $sighup = 0;
50 sub sighup_handler { $sighup = 1; }
51 $SIG{'HUP'} = \&sighup_handler;
52
53 my $sigint = 0;
54 sub sigint_handler { $sigint = 1; }
55 $SIG{'INT'} = \&sigint_handler;
56
57 my $rmlast = undef;
58 sub wait_interval {
59     $rmlast = [gettimeofday] unless defined($rmlast);
60     my $elapsed = $config->{interval} - tv_interval($rmlast);
61     if($elapsed > 0) {
62         sleep($elapsed);
63     }
64     $rmlast = [gettimeofday];
65 }
66
67 sub reap_zombies {
68     my $kid;
69     do {
70         $kid = waitpid(-1, WNOHANG);
71     } while $kid > 0;
72 }
73
74 unless($debug) {
75     fork && exit;
76     setsid;
77     open(STDIN, "</dev/null");
78     open(STDOUT, ">/dev/null");
79     open(STDERR, ">/dev/null");
80     fork && exit;
81 }
82
83 my $list = [];
84 $status = Resmon::Status->new($config->{statusfile});
85 $status->open();
86 $status->serve_http_on($config->{interface}, $config->{port},
87     $config->{authuser}, $config->{authpass})
88 if($config->{port});
89
90 while(1) {
91     while(my($module_name, $mod_configs) = each %{$config->{Module}}) {
92         while(my($check_name, $monitor_obj) = each %$mod_configs) {
93             my $check_metric = {};
94             my $starttime = [gettimeofday];
95             # Get old status if it hasn't expired
96             $check_metric = $monitor_obj->get_cached_metrics();
97             # Otherwise, run the check
98             if (!$check_metric) {
99                 my $timeout = $monitor_obj->{'check_timeout'} ||
100                     $config->{'timeout'};
101                 alarm($timeout);
102                 eval {
103                     local $SIG{ALRM} = sub { die "alarm\n" };
104                     $check_metric = $monitor_obj->handler();
105                 };
106                 alarm 0;
107                 # Store the last metrics for use by fresh_status_msg later
108                 $monitor_obj->cache_metrics($check_metric);
109             };
110             my $checkproblem = $@;
111             my $results = {
112                 last_runtime_seconds => sprintf("%.6f",
113                     tv_interval($starttime)),
114                 metric => $check_metric
115             };
116             if($checkproblem) {
117                 chomp $checkproblem;
118                 $results->{metric} = { "error" => ["$checkproblem", "s"]};
119                 if ($checkproblem eq "alarm") {
120                     $results->{metric} = { "error" => ["Check timeout", "s"]};
121                 }
122                 Resmon::ExtComm::clean_up;
123             }
124             $status->store($module_name,$monitor_obj->{'check_name'}, $results);
125             $status->write($module_name,$monitor_obj->{'check_name'},
126                 $results->{'metric'}, $debug);
127         }
128     }
129     $status->close();
130     die "Exiting.\n" if($sigint);
131     if ($sighup) {
132         # Reload configuration (and modules) on SIGHUP
133         $sighup = 0;
134         print STDERR "Reloading modules\n";
135         $config = Resmon::Config->new($config_file);
136         # Needed to ensure any removed modules do not continue to show in the
137         # web interface
138         $status->clear();
139     } else {
140         reap_zombies();
141         wait_interval();
142         reap_zombies();
143     }
144     die "Exiting.\n" if($sigint);
145     print "\n---- ".localtime(time)."----------\n"
146     unless $status->open();
147 }
Note: See TracBrowser for help on using the browser.