| 1 |
#!/usr/bin/perl |
|---|
| 2 |
|
|---|
| 3 |
BEGIN { |
|---|
| 4 |
(my $dir = $0) =~ s/\/?[^\/]+$//; |
|---|
| 5 |
eval "use lib '$dir/lib';"; |
|---|
| 6 |
die $@ if($@); |
|---|
| 7 |
}; |
|---|
| 8 |
|
|---|
| 9 |
use strict; |
|---|
| 10 |
use warnings; |
|---|
| 11 |
use Time::HiRes qw( gettimeofday tv_interval sleep ); |
|---|
| 12 |
use POSIX qw( :sys_wait_h setsid ); |
|---|
| 13 |
use Getopt::Long; |
|---|
| 14 |
use Data::Dumper; |
|---|
| 15 |
use vars qw($config_file $debug $status_file $interface $port $config |
|---|
| 16 |
$status $update); |
|---|
| 17 |
|
|---|
| 18 |
use Resmon::Config; |
|---|
| 19 |
use Resmon::ExtComm; |
|---|
| 20 |
use Resmon::Status; |
|---|
| 21 |
|
|---|
| 22 |
GetOptions( |
|---|
| 23 |
"i=s" => \$interface, |
|---|
| 24 |
"p=i" => \$port, |
|---|
| 25 |
"c=s" => \$config_file, |
|---|
| 26 |
"d" => \$debug, |
|---|
| 27 |
"f=s" => \$status_file, |
|---|
| 28 |
"u" => \$update, |
|---|
| 29 |
); |
|---|
| 30 |
|
|---|
| 31 |
if ($update) { |
|---|
| 32 |
use Resmon::Updater; |
|---|
| 33 |
(my $resmondir = $0) =~ s/\/?[^\/]+$//; |
|---|
| 34 |
exit(Resmon::Updater::update($debug, $resmondir)); |
|---|
| 35 |
} |
|---|
| 36 |
|
|---|
| 37 |
$config_file ||= "$0.conf"; |
|---|
| 38 |
die "Cannot open configuration file: $config_file" unless (-r $config_file); |
|---|
| 39 |
|
|---|
| 40 |
sub configure { |
|---|
| 41 |
$config = Resmon::Config->new($config_file); |
|---|
| 42 |
$config->{statusfile} = $status_file if($status_file); |
|---|
| 43 |
$config->{port} = $port if($port); |
|---|
| 44 |
$config->{interface} = $interface if($interface); |
|---|
| 45 |
} |
|---|
| 46 |
|
|---|
| 47 |
configure(); |
|---|
| 48 |
|
|---|
| 49 |
my $sighup = 0; |
|---|
| 50 |
sub sighup_handler { $sighup = 1; } |
|---|
| 51 |
$SIG{'HUP'} = \&sighup_handler; |
|---|
| 52 |
|
|---|
| 53 |
my $sigint = 0; |
|---|
| 54 |
sub sigint_handler { $sigint = 1; } |
|---|
| 55 |
$SIG{'INT'} = \&sigint_handler; |
|---|
| 56 |
|
|---|
| 57 |
my $rmlast = undef; |
|---|
| 58 |
sub wait_interval { |
|---|
| 59 |
$rmlast = [gettimeofday] unless defined($rmlast); |
|---|
| 60 |
my $elapsed = $config->{interval} - tv_interval($rmlast); |
|---|
| 61 |
if($elapsed > 0) { |
|---|
| 62 |
sleep($elapsed); |
|---|
| 63 |
} |
|---|
| 64 |
$rmlast = [gettimeofday]; |
|---|
| 65 |
} |
|---|
| 66 |
|
|---|
| 67 |
sub reap_zombies { |
|---|
| 68 |
my $kid; |
|---|
| 69 |
do { |
|---|
| 70 |
$kid = waitpid(-1, WNOHANG); |
|---|
| 71 |
} while $kid > 0; |
|---|
| 72 |
} |
|---|
| 73 |
|
|---|
| 74 |
unless($debug) { |
|---|
| 75 |
fork && exit; |
|---|
| 76 |
setsid; |
|---|
| 77 |
open(STDIN, "</dev/null"); |
|---|
| 78 |
open(STDOUT, ">/dev/null"); |
|---|
| 79 |
open(STDERR, ">/dev/null"); |
|---|
| 80 |
fork && exit; |
|---|
| 81 |
} |
|---|
| 82 |
|
|---|
| 83 |
my $list = []; |
|---|
| 84 |
$status = Resmon::Status->new($config->{statusfile}); |
|---|
| 85 |
$status->open(); |
|---|
| 86 |
$status->serve_http_on($config->{interface}, $config->{port}, |
|---|
| 87 |
$config->{authuser}, $config->{authpass}) |
|---|
| 88 |
if($config->{port}); |
|---|
| 89 |
|
|---|
| 90 |
while(1) { |
|---|
| 91 |
while(my($module_name, $mod_configs) = each %{$config->{Module}}) { |
|---|
| 92 |
while(my($check_name, $monitor_obj) = each %$mod_configs) { |
|---|
| 93 |
my $check_metric = {}; |
|---|
| 94 |
my $starttime = [gettimeofday]; |
|---|
| 95 |
# Get old status if it hasn't expired |
|---|
| 96 |
$check_metric = $monitor_obj->get_cached_metrics(); |
|---|
| 97 |
# Otherwise, run the check |
|---|
| 98 |
if (!$check_metric) { |
|---|
| 99 |
my $timeout = $monitor_obj->{'check_timeout'} || |
|---|
| 100 |
$config->{'timeout'}; |
|---|
| 101 |
alarm($timeout); |
|---|
| 102 |
eval { |
|---|
| 103 |
local $SIG{ALRM} = sub { die "alarm\n" }; |
|---|
| 104 |
$check_metric = $monitor_obj->handler(); |
|---|
| 105 |
}; |
|---|
| 106 |
alarm 0; |
|---|
| 107 |
# Store the last metrics for use by fresh_status_msg later |
|---|
| 108 |
$monitor_obj->cache_metrics($check_metric); |
|---|
| 109 |
}; |
|---|
| 110 |
my $checkproblem = $@; |
|---|
| 111 |
my $results = { |
|---|
| 112 |
last_runtime_seconds => sprintf("%.6f", |
|---|
| 113 |
tv_interval($starttime)), |
|---|
| 114 |
metric => $check_metric |
|---|
| 115 |
}; |
|---|
| 116 |
if($checkproblem) { |
|---|
| 117 |
chomp $checkproblem; |
|---|
| 118 |
$results->{metric} = { "error" => ["$checkproblem", "s"]}; |
|---|
| 119 |
if ($checkproblem eq "alarm") { |
|---|
| 120 |
$results->{metric} = { "error" => ["Check timeout", "s"]}; |
|---|
| 121 |
} |
|---|
| 122 |
Resmon::ExtComm::clean_up; |
|---|
| 123 |
} |
|---|
| 124 |
$status->store($module_name,$monitor_obj->{'check_name'}, $results); |
|---|
| 125 |
$status->write($module_name,$monitor_obj->{'check_name'}, |
|---|
| 126 |
$results->{'metric'}, $debug); |
|---|
| 127 |
} |
|---|
| 128 |
} |
|---|
| 129 |
$status->close(); |
|---|
| 130 |
die "Exiting.\n" if($sigint); |
|---|
| 131 |
if ($sighup) { |
|---|
| 132 |
# Reload configuration (and modules) on SIGHUP |
|---|
| 133 |
$sighup = 0; |
|---|
| 134 |
print STDERR "Reloading modules\n"; |
|---|
| 135 |
$config = Resmon::Config->new($config_file); |
|---|
| 136 |
# Needed to ensure any removed modules do not continue to show in the |
|---|
| 137 |
# web interface |
|---|
| 138 |
$status->clear(); |
|---|
| 139 |
} else { |
|---|
| 140 |
reap_zombies(); |
|---|
| 141 |
wait_interval(); |
|---|
| 142 |
reap_zombies(); |
|---|
| 143 |
} |
|---|
| 144 |
die "Exiting.\n" if($sigint); |
|---|
| 145 |
print "\n---- ".localtime(time)."----------\n" |
|---|
| 146 |
unless $status->open(); |
|---|
| 147 |
} |
|---|