1 |
#!/usr/bin/perl -w |
---|
2 |
|
---|
3 |
package main; |
---|
4 |
my $program = Monitoring->new(); |
---|
5 |
$program->run(); |
---|
6 |
|
---|
7 |
exit; |
---|
8 |
|
---|
9 |
package Monitoring; |
---|
10 |
use strict; |
---|
11 |
use English qw( -no_match_vars ); |
---|
12 |
use Time::HiRes qw( time ); |
---|
13 |
use POSIX qw( strftime ); |
---|
14 |
use File::Spec; |
---|
15 |
use File::Path qw( mkpath ); |
---|
16 |
use Data::Dumper; |
---|
17 |
use IO::Select; |
---|
18 |
use IO::Handle; |
---|
19 |
|
---|
20 |
sub new { |
---|
21 |
my $class = shift; |
---|
22 |
return bless {}, $class; |
---|
23 |
} |
---|
24 |
|
---|
25 |
sub run { |
---|
26 |
my $self = shift; |
---|
27 |
|
---|
28 |
$self->read_config(); |
---|
29 |
$self->validate_config(); |
---|
30 |
|
---|
31 |
$self->{ 'select' } = IO::Select->new(); |
---|
32 |
$self->start_persistent_processes(); |
---|
33 |
|
---|
34 |
$self->main_loop(); |
---|
35 |
return; |
---|
36 |
} |
---|
37 |
|
---|
38 |
sub main_loop { |
---|
39 |
my $self = shift; |
---|
40 |
while ( 1 ) { |
---|
41 |
$self->{ 'current_time' } = time(); |
---|
42 |
$self->update_logger_filehandles(); |
---|
43 |
|
---|
44 |
my $timeout = $self->calculate_timeout(); |
---|
45 |
my @ready = $self->{ 'select' }->can_read( $timeout ); |
---|
46 |
for my $fh ( @ready ) { |
---|
47 |
$self->handle_read( $fh ); |
---|
48 |
} |
---|
49 |
$self->start_periodic_processes(); |
---|
50 |
} |
---|
51 |
} |
---|
52 |
|
---|
53 |
sub handle_read { |
---|
54 |
my $self = shift; |
---|
55 |
my $fh = shift; |
---|
56 |
|
---|
57 |
my $C; |
---|
58 |
for my $check ( keys %{ $self->{ 'checks' } } ) { |
---|
59 |
my $tmp = $self->{ 'checks' }->{ $check }; |
---|
60 |
next unless $tmp->{ 'input' }; |
---|
61 |
my $tmp_fh = $tmp->{ 'input' }; |
---|
62 |
next if "$tmp_fh" ne "$fh"; # Stringified reference to io handle |
---|
63 |
$C = $tmp; |
---|
64 |
last; |
---|
65 |
} |
---|
66 |
die "Data from unknown input?! It shouldn't *ever* happen\n" unless $C; |
---|
67 |
|
---|
68 |
my $read_data = ''; |
---|
69 |
while ( 1 ) { |
---|
70 |
my $buffer; |
---|
71 |
my $read_bytes = sysread( $fh, $buffer, 8192 ); |
---|
72 |
$read_data .= $buffer; |
---|
73 |
last if 8192 > $read_bytes; |
---|
74 |
} |
---|
75 |
if ( '' eq $read_data ) { |
---|
76 |
$self->{ 'select' }->remove( $fh ); |
---|
77 |
close $fh; |
---|
78 |
delete $C->{ 'input' }; |
---|
79 |
return unless 'periodic' eq $C->{ 'type' }; |
---|
80 |
$C->{ 'next_call' } = $self->{ 'current_time' } + $C->{ 'interval' } if $self->{ 'current_time' } < $C->{ 'next_call' }; |
---|
81 |
return; |
---|
82 |
} |
---|
83 |
return if $C->{ 'ignore' }; |
---|
84 |
|
---|
85 |
my $line_prefix = strftime( '%Y-%m-%d %H:%M:%S %Z | ', localtime( $self->{ 'current_time' } ) ); |
---|
86 |
$read_data =~ s/^/$line_prefix/gm; |
---|
87 |
$read_data =~ s/([^\n])\z/$1\n/; |
---|
88 |
print { $C->{ 'fh' } } $read_data; |
---|
89 |
$C->{ 'fh' }->flush(); |
---|
90 |
return; |
---|
91 |
} |
---|
92 |
|
---|
93 |
sub run_check { |
---|
94 |
my $self = shift; |
---|
95 |
my $C = shift; |
---|
96 |
my $command = $C->{ 'exec' }; |
---|
97 |
|
---|
98 |
my $mode = '-|'; |
---|
99 |
$mode = '<' if $command =~ s/\A\s*<\s*//; |
---|
100 |
|
---|
101 |
open my $fh, $mode, $command or die "Cannot open [$command] in mode [$mode]: $OS_ERROR\n"; |
---|
102 |
$self->{ 'select' }->add( $fh ); |
---|
103 |
$C->{ 'input' } = $fh; |
---|
104 |
|
---|
105 |
return; |
---|
106 |
} |
---|
107 |
|
---|
108 |
sub start_periodic_processes { |
---|
109 |
my $self = shift; |
---|
110 |
for my $C ( values %{ $self->{ 'checks' } } ) { |
---|
111 |
next unless 'periodic' eq $C->{ 'type' }; |
---|
112 |
next if defined $C->{ 'input' }; |
---|
113 |
next if ( defined $C->{ 'next_call' } ) && ( $C->{ 'next_call' } > $self->{ 'current_time' } ); |
---|
114 |
$self->run_check( $C ); |
---|
115 |
$C->{ 'next_call' } = $self->{ 'current_time' } + $C->{ 'interval' }; |
---|
116 |
} |
---|
117 |
return; |
---|
118 |
} |
---|
119 |
|
---|
120 |
sub start_persistent_processes { |
---|
121 |
my $self = shift; |
---|
122 |
for my $C ( values %{ $self->{ 'checks' } } ) { |
---|
123 |
next unless 'persistent' eq $C->{ 'type' }; |
---|
124 |
$self->run_check( $C ); |
---|
125 |
} |
---|
126 |
return; |
---|
127 |
} |
---|
128 |
|
---|
129 |
sub calculate_timeout { |
---|
130 |
my $self = shift; |
---|
131 |
my $nearest = undef; |
---|
132 |
for my $check ( keys %{ $self->{ 'checks' } } ) { |
---|
133 |
my $C = $self->{ 'checks' }->{ $check }; |
---|
134 |
next if 'persistent' eq $C->{ 'type' }; |
---|
135 |
next if defined $C->{ 'input' }; |
---|
136 |
return 0 unless defined $C->{ 'next_call' }; |
---|
137 |
if ( defined $nearest ) { |
---|
138 |
$nearest = $C->{ 'next_call' } if $C->{ 'next_call' } < $nearest; |
---|
139 |
} |
---|
140 |
else { |
---|
141 |
$nearest = $C->{ 'next_call' }; |
---|
142 |
} |
---|
143 |
} |
---|
144 |
$nearest = $self->{ 'current_time' } unless defined $nearest; |
---|
145 |
my $sleep_time = $nearest - $self->{ 'current_time' }; |
---|
146 |
return $sleep_time < 0.5 ? 0.5 : $sleep_time; # limit sleep time to 0.5s to avoid too aggresive calls. |
---|
147 |
} |
---|
148 |
|
---|
149 |
sub update_logger_filehandles { |
---|
150 |
my $self = shift; |
---|
151 |
|
---|
152 |
my $file_suffix = strftime( '-%Y-%m-%d-%H.log', localtime( $self->{ 'current_time' } ) ); |
---|
153 |
return if ( defined $self->{ 'previous-suffix' } ) && ( $self->{ 'previous-suffix' } eq $file_suffix ); |
---|
154 |
$self->{ 'previous-suffix' } = $file_suffix; |
---|
155 |
|
---|
156 |
my $directory_prefix = strftime( '%Y/%m/%d', localtime( $self->{ 'current_time' } ) ); |
---|
157 |
my $full_directory = File::Spec->catfile( $self->{ 'logdir' }, $directory_prefix ); |
---|
158 |
|
---|
159 |
mkpath( [ $full_directory ], 0, oct( "750" ) ) unless -e $full_directory; |
---|
160 |
|
---|
161 |
for my $check ( keys %{ $self->{ 'checks' } } ) { |
---|
162 |
my $C = $self->{ 'checks' }->{ $check }; |
---|
163 |
next if $C->{ 'ignore' }; |
---|
164 |
|
---|
165 |
if ( $C->{ 'fh' } ) { |
---|
166 |
close $C->{ 'fh' }; |
---|
167 |
delete $C->{ 'fh' }; |
---|
168 |
} |
---|
169 |
|
---|
170 |
my $full_name = File::Spec->catfile( $full_directory, $check . $file_suffix ); |
---|
171 |
open my $fh, '>>', $full_name or die "Cannot write to $full_name: $OS_ERROR\n"; |
---|
172 |
$C->{ 'fh' } = $fh; |
---|
173 |
} |
---|
174 |
|
---|
175 |
return; |
---|
176 |
} |
---|
177 |
|
---|
178 |
sub validate_config { |
---|
179 |
my $self = shift; |
---|
180 |
|
---|
181 |
die "GLOBAL.logdir was not provided in config!\n" unless defined $self->{ 'logdir' }; |
---|
182 |
die "There are no checks to be run!\n" unless defined $self->{ 'checks' }; |
---|
183 |
|
---|
184 |
for my $check ( sort keys %{ $self->{ 'checks' } } ) { |
---|
185 |
my $C = $self->{ 'checks' }->{ $check }; |
---|
186 |
$C->{ 'name' } = $check; |
---|
187 |
|
---|
188 |
die "Bad type " . $C->{ 'type' } . " in check $check!\n" unless $C->{ 'type' } =~ m{\A(?:persistent|periodic)\z}; |
---|
189 |
next unless $C->{ 'type' } eq 'periodic'; |
---|
190 |
|
---|
191 |
die "Undefined interval for check $check!\n" unless defined $C->{ 'interval' }; |
---|
192 |
die "Bad interval (" . $C->{ 'interval' } . ") in check $check!\n" unless $C->{ 'interval' } =~ m{\A[1-9]\d*\z}; |
---|
193 |
} |
---|
194 |
return; |
---|
195 |
} |
---|
196 |
|
---|
197 |
sub read_config { |
---|
198 |
my $self = shift; |
---|
199 |
|
---|
200 |
die "You have to provide name of config file! Check: perldoc $PROGRAM_NAME\n" if 0 == scalar @ARGV; |
---|
201 |
my $config_file_name = shift @ARGV; |
---|
202 |
|
---|
203 |
open my $fh, '<', $config_file_name or die "Cannot open config file ($config_file_name) : $OS_ERROR\n"; |
---|
204 |
while ( my $line = <$fh> ) { |
---|
205 |
next if $line =~ m{^\s*#}; # comment |
---|
206 |
next if $line =~ m{^\s*\z}; # empty line |
---|
207 |
$line =~ s{\A\s*}{}; # removing leading spaces |
---|
208 |
$line =~ s{\s*\z}{}; # removing trailing spaces |
---|
209 |
if ( $line =~ m{ \A GLOBAL\.logdir \s* = \s* (\S.*) \z }xmsi ) { |
---|
210 |
$self->{ 'logdir' } = $1; |
---|
211 |
next; |
---|
212 |
} |
---|
213 |
elsif ( $line =~ m{ \A check\.([A-Za-z0-9_]+)\.(type|exec|interval|ignore) \s* = \s* (\S.*) \z }xmsi ) { |
---|
214 |
$self->{ 'checks' }->{ $1 }->{ $2 } = $3; |
---|
215 |
next; |
---|
216 |
} |
---|
217 |
die "Unknown line: [ $line ]\n"; |
---|
218 |
} |
---|
219 |
close $fh; |
---|
220 |
return; |
---|
221 |
} |
---|
222 |
|
---|
223 |
1; |
---|
224 |
|
---|
225 |
=head1 system_monitoring.pl |
---|
226 |
|
---|
227 |
=head2 USAGE |
---|
228 |
|
---|
229 |
omnipitr-archive <config_file> |
---|
230 |
|
---|
231 |
=head2 DESCRIPTION |
---|
232 |
|
---|
233 |
system_monitoring.pl script is meant to provide single and solution for logging system data which change more often than it's practical for systems like cacti/nagios. |
---|
234 |
|
---|
235 |
It is meant to be run on some low-privilege account, and gather the data, which are partitioned automatically by source, and time, and stored in simple text files. |
---|
236 |
|
---|
237 |
After running, system_monitor.pl will check config, and if there are no errors - will start processing checks. |
---|
238 |
|
---|
239 |
All checks work in parallel, so there is no chance single check could lock whole system_monitoring.pl. |
---|
240 |
|
---|
241 |
=head2 Configuration file |
---|
242 |
|
---|
243 |
Format of the configuration file is kept as simple as possible, to make this script very portable - which in this particular case means: no external (aside from core perl) dependencies. |
---|
244 |
|
---|
245 |
Each line should be one of: |
---|
246 |
|
---|
247 |
=over |
---|
248 |
|
---|
249 |
=item * Comment (starts with #) |
---|
250 |
|
---|
251 |
=item * Empty line (just white space characters) |
---|
252 |
|
---|
253 |
=item * Setting |
---|
254 |
|
---|
255 |
=back |
---|
256 |
|
---|
257 |
Where setting line looks like: |
---|
258 |
|
---|
259 |
PARAM=value |
---|
260 |
|
---|
261 |
with optional leading, trailing or around "=" whitespace. |
---|
262 |
|
---|
263 |
Recognized parameters are: |
---|
264 |
|
---|
265 |
=over |
---|
266 |
|
---|
267 |
=item * GLOBAL.logdir - full path to log directory |
---|
268 |
|
---|
269 |
=item * check.XXX.type - type of check with name XXX |
---|
270 |
|
---|
271 |
=item * check.XXX.exec - what should be executed to get data for check XXX |
---|
272 |
|
---|
273 |
=item * check.XXX.interval - how often to run check XXX |
---|
274 |
|
---|
275 |
=item * check.XXX.ignore - should output be ignored? |
---|
276 |
|
---|
277 |
=back |
---|
278 |
|
---|
279 |
There are only two supported types: |
---|
280 |
|
---|
281 |
=over |
---|
282 |
|
---|
283 |
=item * persistent - which means given program is to be run in background, and whatever it will return should be logged. Such program "interval" will be ignored. |
---|
284 |
|
---|
285 |
=item * periodic - which means that given program is to be run periodically as it will exit after returning data |
---|
286 |
|
---|
287 |
=back |
---|
288 |
|
---|
289 |
"exec" parameter is simply command line, to be run via shell, that will run the program. |
---|
290 |
|
---|
291 |
If exec parameter starts with '<' character (with optional whitespace characters after), it is treated as filename to be read, and logged. |
---|
292 |
|
---|
293 |
Due to the way it is internally processed - using "<" approach makes sense only for periodic checks - in case of permenent checks it would simply copy the file at start of system_monitoring.pl, and |
---|
294 |
ignore any changes to it afterwards. If you'd like to have something like 'tail -f' - use tail -f. |
---|
295 |
|
---|
296 |
interval is time (in seconds) how often given program (of periodic type) should be run. |
---|
297 |
|
---|
298 |
ignore is optional parameter which is checked using Perl boolean logic (any value other than empty string or 0 ar treated as true). Since system_monitoring doesn't let setting empty string as value |
---|
299 |
for option - it's best to not include ignore option for checks you want to log, and just add '...ignore=1' for those that you want to ignore. |
---|
300 |
|
---|
301 |
If ignore is set, system_monitoring will not log output from such check. |
---|
302 |
|
---|
303 |
This is helpful to build-in compression of older logs, using for example: |
---|
304 |
|
---|
305 |
check.cleanup.type=periodic |
---|
306 |
check.cleanup.interval=300 |
---|
307 |
check.cleanup.exec=find /var/log/monitoring -type f -name '*.log' -mmin +120 -print0 | xargs -0 gzip |
---|
308 |
check.cleanup.ignore=1 |
---|
309 |
|
---|
310 |
"XXX" (name of check) can consist only of upper and lower case letters, digits, and character _. That is it has to match regular expression: |
---|
311 |
|
---|
312 |
/\A[A-Za-z0-9_]+\z/ |
---|
313 |
|
---|
314 |
Output from all programs will be logged in files named: |
---|
315 |
|
---|
316 |
/logdir/YYYY/MM/DD/XXX-YYY-MM-DD-HH.log |
---|
317 |
|
---|
318 |
where YYYY, MM, DD and HH are date and time parts of current (as of logging moment) time. |
---|
319 |
|
---|
320 |
HH is 0 padded 24-hour style hour. |
---|
321 |
|
---|
322 |
Example configuration: |
---|
323 |
|
---|
324 |
# Global configuration, log directory |
---|
325 |
GLOBAL.logdir=/var/tmp/monitoring |
---|
326 |
|
---|
327 |
# Logging iostat output in 10 second intervals |
---|
328 |
check.iostat.type=persistent |
---|
329 |
check.iostat.exec=iostat -kx 10 |
---|
330 |
|
---|
331 |
# Logging "ps auxwwn" every 30 seconds. |
---|
332 |
check.ps.type=periodic |
---|
333 |
check.ps.exec=ps auxwwn |
---|
334 |
check.ps.interval=30 |
---|
335 |
|
---|
336 |
=head2 LICENSE |
---|
337 |
|
---|
338 |
Copyright (c) 2010, OmniTI, Inc. |
---|
339 |
|
---|
340 |
Permission to use, copy, modify, and distribute this software and its |
---|
341 |
documentation for any purpose, without fee, and without a written agreement |
---|
342 |
is hereby granted, provided that the above copyright notice and this |
---|
343 |
paragraph and the following two paragraphs appear in all copies. |
---|
344 |
|
---|
345 |
IN NO EVENT SHALL OmniTI, Inc. BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, |
---|
346 |
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, |
---|
347 |
ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF |
---|
348 |
OmniTI, Inc. HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
349 |
|
---|
350 |
OmniTI, Inc. SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT |
---|
351 |
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
---|
352 |
PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, |
---|
353 |
AND OmniTI, Inc. HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, |
---|
354 |
UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
---|
355 |
|
---|
356 |
=head2 COPYRIGHT |
---|
357 |
|
---|
358 |
The OmniPITR project is Copyright (c) 2010 OmniTI. All rights reserved. |
---|
359 |
|
---|