root/resmon

Revision 117de5423d008dc004459bff76cfcd44a3ab4068, 4.1 kB (checked in by Mark Harrison <mark@omniti.com>, 5 years ago)

Allow die in modules to return error messages.

Note that you should add "\n" to die messages in order to avoid having the
line number and filename included in the message.

git-svn-id: https://labs.omniti.com/resmon/branches/resmon2@313 8c0face9-b7db-6ec6-c4b3-d5f7145c7d55

  • Property mode set to 100755
Line 
1 #!/usr/bin/perl
2
3 BEGIN {
4     (my $dir = $0) =~ s/\/?[^\/]+$//;
5     eval "use lib '$dir/lib';";
6     die $@ if($@);
7 };
8
9 use strict;
10 use warnings;
11 use Time::HiRes qw( gettimeofday tv_interval sleep );
12 use POSIX qw( :sys_wait_h setsid );
13 use Getopt::Long;
14 use Data::Dumper;
15 use vars qw($config_file $debug $status_file $interface $port $config
16 $status $update);
17
18 use Resmon::Config;
19 use Resmon::ExtComm;
20 use Resmon::Status;
21
22 GetOptions(
23     "i=s" => \$interface,
24     "p=i" => \$port,
25     "c=s" => \$config_file,
26     "d"   => \$debug,
27     "f=s" => \$status_file,
28     "u"   => \$update,
29 );
30
31 if ($update) {
32     use Resmon::Updater;
33     (my $resmondir = $0) =~ s/\/?[^\/]+$//;
34     exit(Resmon::Updater::update($debug, $resmondir));
35 }
36
37 $config_file ||= "$0.conf";
38 die "Cannot open configuration file: $config_file" unless (-r $config_file);
39
40 sub configure {
41     $config = Resmon::Config->new($config_file);
42     $config->{statusfile} = $status_file if($status_file);
43     $config->{port} = $port if($port);
44     $config->{interface} = $interface if($interface);
45 }
46
47 configure();
48
49 my $sighup = 0;
50 sub sighup_handler { $sighup = 1; }
51 $SIG{'HUP'} = \&sighup_handler;
52
53 my $sigint = 0;
54 sub sigint_handler { $sigint = 1; }
55 $SIG{'INT'} = \&sigint_handler;
56
57 my $rmlast = undef;
58 sub wait_interval {
59     $rmlast = [gettimeofday] unless defined($rmlast);
60     my $elapsed = $config->{interval} - tv_interval($rmlast);
61     if($elapsed > 0) {
62         sleep($elapsed);
63     }
64     $rmlast = [gettimeofday];
65 }
66
67 sub reap_zombies {
68     my $kid;
69     do {
70         $kid = waitpid(-1, WNOHANG);
71     } while $kid > 0;
72 }
73
74 unless($debug) {
75     fork && exit;
76     setsid;
77     open(STDIN, "</dev/null");
78     open(STDOUT, ">/dev/null");
79     open(STDERR, ">/dev/null");
80     fork && exit;
81 }
82
83 my $list = [];
84 $status = Resmon::Status->new($config->{statusfile});
85 $status->open();
86 $status->serve_http_on($config->{interface}, $config->{port},
87     $config->{authuser}, $config->{authpass})
88 if($config->{port});
89
90 while(1) {
91     while(my($module_name, $mod_configs) = each %{$config->{Module}}) {
92         foreach my $monitor_obj (@$mod_configs) {
93             my $check_metric = {};
94             my $starttime = [gettimeofday];
95             # Get old status if it hasn't expired
96             $check_metric = $monitor_obj->get_cached_metrics();
97             # Otherwise, run the check
98             if (!$check_metric) {
99                 my $timeout = $monitor_obj->{'check_timeout'} ||
100                     $config->{'timeout'};
101                 alarm($timeout);
102                 eval {
103                     local $SIG{ALRM} = sub { die "alarm\n" };
104                     $check_metric = $monitor_obj->handler();
105                 };
106                 alarm 0;
107                 # Store the last metrics for use by fresh_status_msg later
108                 $monitor_obj->cache_metrics($check_metric);
109             };
110             my $checkproblem = $@;
111             my $results = {
112                 last_runtime_seconds => sprintf("%.6f",
113                     tv_interval($starttime)),
114                 metric => $check_metric
115             };
116             if($checkproblem) {
117                 chomp $checkproblem;
118                 $results->{metric} = { "error" => ["$checkproblem", "s"]};
119                 if ($checkproblem eq "alarm") {
120                     $results->{metric} = { "error" => ["Check timeout", "s"]};
121                 }
122                 Resmon::ExtComm::clean_up;
123             }
124             $status->store($module_name,$monitor_obj->{'check_name'}, $results);
125             $status->write($module_name,$monitor_obj->{'check_name'},
126                 $results->{'metric'}, $debug);
127         }
128     }
129     $status->close();
130     die "Exiting.\n" if($sigint);
131     if ($sighup) {
132         # Reload configuration (and modules) on SIGHUP
133         $sighup = 0;
134         print STDERR "Reloading modules\n";
135         $config = Resmon::Config->new($config_file);
136         # Needed to ensure any removed modules do not continue to show in the
137         # web interface
138         $status->clear();
139     } else {
140         reap_zombies();
141         wait_interval();
142         reap_zombies();
143     }
144     die "Exiting.\n" if($sigint);
145     print "\n---- ".localtime(time)."----------\n"
146     unless $status->open();
147 }
Note: See TracBrowser for help on using the browser.