Changeset 201

Show
Ignore:
Timestamp:
11/26/10 14:16:18 (3 years ago)
Author:
depesz
Message:

- some cleanup to make the code a bit more readable
- added documentation for methods in case anyone ever would need to modify it

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/tools/system_monitoring.pl

    r200 r201  
    1414use File::Spec; 
    1515use File::Path qw( mkpath ); 
    16 use Data::Dumper; 
    1716use IO::Select; 
    1817use IO::Handle; 
     
    5655 
    5756    my $C; 
    58     for my $check ( keys %{ $self->{ 'checks' } } ) { 
    59         my $tmp = $self->{ 'checks' }->{ $check }; 
     57    for my $tmp ( $self->checks ) { 
    6058        next unless $tmp->{ 'input' }; 
    6159        my $tmp_fh = $tmp->{ 'input' }; 
     
    108106sub start_periodic_processes { 
    109107    my $self = shift; 
    110     for my $C ( values %{ $self->{ 'checks' } } ) { 
     108    for my $C ( $self->checks ) { 
    111109        next unless 'periodic' eq $C->{ 'type' }; 
    112110        next if defined $C->{ 'input' }; 
     
    120118sub start_persistent_processes { 
    121119    my $self = shift; 
    122     for my $C ( values %{ $self->{ 'checks' } } ) { 
     120    for my $C ( $self->checks ) { 
    123121        next unless 'persistent' eq $C->{ 'type' }; 
    124122        $self->run_check( $C ); 
     
    128126 
    129127sub calculate_timeout { 
    130     my $self    = shift; 
     128    my $self = shift; 
     129 
    131130    my $nearest = undef; 
    132     for my $check ( keys %{ $self->{ 'checks' } } ) { 
    133         my $C = $self->{ 'checks' }->{ $check }; 
     131 
     132    for my $C ( $self->checks ) { 
    134133        next if 'persistent' eq $C->{ 'type' }; 
    135134        next if defined $C->{ 'input' }; 
     
    142141        } 
    143142    } 
     143 
    144144    $nearest = $self->{ 'current_time' } unless defined $nearest; 
    145145    my $sleep_time = $nearest - $self->{ 'current_time' }; 
     146 
    146147    return $sleep_time < 0.5 ? 0.5 : $sleep_time;    # limit sleep time to 0.5s to avoid too aggresive calls. 
    147148} 
     
    159160    mkpath( [ $full_directory ], 0, oct( "750" ) ) unless -e $full_directory; 
    160161 
    161     for my $check ( keys %{ $self->{ 'checks' } } ) { 
    162         my $C = $self->{ 'checks' }->{ $check }; 
     162    for my $C ( $self->checks ) { 
    163163        next if $C->{ 'ignore' }; 
    164164 
     
    168168        } 
    169169 
    170         my $full_name = File::Spec->catfile( $full_directory, $check . $file_suffix ); 
     170        my $full_name = File::Spec->catfile( $full_directory, $C->{ 'name' } . $file_suffix ); 
    171171        open my $fh, '>>', $full_name or die "Cannot write to $full_name: $OS_ERROR\n"; 
    172172        $C->{ 'fh' } = $fh; 
     
    176176} 
    177177 
     178sub checks { 
     179    my $self = shift; 
     180    return @{ $self->{ 'checks' } }; 
     181} 
     182 
    178183sub validate_config { 
    179184    my $self = shift; 
    180185 
    181186    die "GLOBAL.logdir was not provided in config!\n" unless defined $self->{ 'logdir' }; 
    182     die "There are no checks to be run!\n"            unless defined $self->{ 'checks' }; 
    183  
    184     for my $check ( sort keys %{ $self->{ 'checks' } } ) { 
    185         my $C = $self->{ 'checks' }->{ $check }; 
     187    die "There are no checks to be run!\n"            unless defined $self->{ 'pre_checks' }; 
     188 
     189    my @checks = (); 
     190    while ( my ( $check, $C ) = each %{ $self->{ 'pre_checks' } } ) { 
    186191        $C->{ 'name' } = $check; 
     192        push @checks, $C; 
    187193 
    188194        die "Bad type " . $C->{ 'type' } . " in check $check!\n" unless $C->{ 'type' } =~ m{\A(?:persistent|periodic)\z}; 
     
    192198        die "Bad interval (" . $C->{ 'interval' } . ") in check $check!\n" unless $C->{ 'interval' } =~ m{\A[1-9]\d*\z}; 
    193199    } 
     200 
     201    $self->{ 'checks' } = \@checks; 
     202    delete $self->{ 'pre_checks' }; 
     203 
    194204    return; 
    195205} 
     
    212222        } 
    213223        elsif ( $line =~ m{ \A check\.([A-Za-z0-9_]+)\.(type|exec|interval|ignore) \s* = \s* (\S.*) \z }xmsi ) { 
    214             $self->{ 'checks' }->{ $1 }->{ $2 } = $3; 
     224            $self->{ 'pre_checks' }->{ $1 }->{ $2 } = $3; 
    215225            next; 
    216226        } 
     
    231241=head2 DESCRIPTION 
    232242 
    233 system_monitoring.pl script is meant to provide single and solution for logging system data which change more often than it's practical for systems like cacti/nagios. 
    234  
    235 It is meant to be run on some low-privilege account, and gather the data, which are partitioned automatically by source, and time, and stored in simple text files. 
    236  
    237 After running, system_monitor.pl will check config, and if there are no errors - will start processing checks. 
    238  
    239 All checks work in parallel, so there is no chance single check could lock whole system_monitoring.pl. 
     243system_monitoring.pl script is meant to provide single and solution for 
     244logging system data which change more often than it's practical for systems 
     245like cacti/nagios. 
     246 
     247It is meant to be run on some low-privilege account, and gather the data, 
     248which are partitioned automatically by source, and time, and stored in 
     249simple text files. 
     250 
     251After running, system_monitor.pl will check config, and if there are no 
     252errors - will start processing checks. 
     253 
     254All checks work in parallel, so there is no chance single check could lock 
     255whole system_monitoring.pl. 
    240256 
    241257=head2 Configuration file 
    242258 
    243 Format of the configuration file is kept as simple as possible, to make this script very portable - which in this particular case means: no external (aside from core perl) dependencies. 
     259Format of the configuration file is kept as simple as possible, to make this 
     260script very portable - which in this particular case means: no external 
     261(aside from core perl) dependencies. 
    244262 
    245263Each line should be one of: 
     
    281299=over 
    282300 
    283 =item * persistent - which means given program is to be run in background, and whatever it will return should be logged. Such program "interval" will be ignored. 
    284  
    285 =item * periodic - which means that given program is to be run periodically as it will exit after returning data 
     301=item * persistent - which means given program is to be run in background, 
     302and whatever it will return should be logged. Such program "interval" will 
     303be ignored. 
     304 
     305=item * periodic - which means that given program is to be run periodically 
     306as it will exit after returning data 
    286307 
    287308=back 
    288309 
    289 "exec" parameter is simply command line, to be run via shell, that will run the program. 
    290  
    291 If exec parameter starts with '<' character (with optional whitespace characters after), it is treated as filename to be read, and logged. 
    292  
    293 Due to the way it is internally processed - using "<" approach makes sense only for periodic checks - in case of permenent checks it would simply copy the file at start of system_monitoring.pl, and 
    294 ignore any changes to it afterwards. If you'd like to have something like 'tail -f' - use tail -f. 
    295  
    296 interval is time (in seconds) how often given program (of periodic type) should be run. 
    297  
    298 ignore is optional parameter which is checked using Perl boolean logic (any value other than empty string or 0 ar treated as true). Since system_monitoring doesn't let setting empty string as value 
    299 for option - it's best to not include ignore option for checks you want to log, and just add '...ignore=1' for those that you want to ignore. 
     310"exec" parameter is simply command line, to be run via shell, that will run 
     311the program. 
     312 
     313If exec parameter starts with '<' character (with optional whitespace 
     314characters after), it is treated as filename to be read, and logged. 
     315 
     316Due to the way it is internally processed - using "<" approach makes sense 
     317only for periodic checks - in case of permenent checks it would simply copy 
     318the file at start of system_monitoring.pl, and ignore any changes to it 
     319afterwards. If you'd like to have something like 'tail -f' - use tail -f. 
     320 
     321interval is time (in seconds) how often given program (of periodic type) 
     322should be run. 
     323 
     324ignore is optional parameter which is checked using Perl boolean logic (any 
     325value other than empty string or 0 ar treated as true). Since 
     326system_monitoring doesn't let setting empty string as value for option - 
     327it's best to not include ignore option for checks you want to log, and just 
     328add '...ignore=1' for those that you want to ignore. 
    300329 
    301330If ignore is set, system_monitoring will not log output from such check. 
     
    308337    check.cleanup.ignore=1 
    309338 
    310 "XXX" (name of check) can consist only of upper and lower case letters, digits, and character _. That is it has to match regular expression: 
     339"XXX" (name of check) can consist only of upper and lower case letters, 
     340digits, and character _. That is it has to match regular expression: 
    311341 
    312342    /\A[A-Za-z0-9_]+\z/ 
     
    316346    /logdir/YYYY/MM/DD/XXX-YYY-MM-DD-HH.log 
    317347 
    318 where YYYY, MM, DD and HH are date and time parts of current (as of logging moment) time. 
     348where YYYY, MM, DD and HH are date and time parts of current (as of logging 
     349moment) time. 
    319350 
    320351HH is 0 padded 24-hour style hour. 
     
    333364    check.ps.exec=ps auxwwn 
    334365    check.ps.interval=30 
     366 
     367=head2 INTERNALS 
     368 
     369Program itself is very short: 
     370 
     371    my $program = Monitoring->new(); 
     372    $program->run(); 
     373 
     374This creates $program as object of Monitoring class (defined in the same 
     375file), and calls method run() on it. 
     376 
     377=head3 METHODS 
     378 
     379=head4 new 
     380 
     381Just object constructor. Nothing to see there. 
     382 
     383=head4 run 
     384 
     385Initialization of stuff, and call to main_loop. Reads and validates config 
     386(by calls to appropriate methods), initializes IO::Select object for 
     387asynchronous I/O, starts persistent checks (again, using special metod), and 
     388enters main_loop(); 
     389 
     390=head4 main_loop 
     391 
     392The core of the program. Infinite loop, which - upon every iteration: 
     393 
     394=over 
     395 
     396=item * updates logging filehandles 
     397 
     398=item * checks if there is anything to read in input filehandles (from 
     399checks) 
     400 
     401=item * reads whatever is to be read from checks 
     402 
     403=item * runs new periodic checks if the time has come to do it 
     404 
     405=back 
     406 
     407Checking for data in input filehandles is done with timeout, which is 
     408calculated to finish when next check will have to be run, so the program 
     409doesn't use virtually no CPU unless there are some data to be worked on. 
     410 
     411=head4 handle_read 
     412 
     413Since all we get from IO::Select is filehandle to read from, this method has 
     414first to find which check given filehandle belongs to. 
     415 
     416Afterwards, it reads whatever is available in the filehandle. In case there 
     417is error on the filehandle - it closes the filehandle - as it means that 
     418output for given check ended. 
     419 
     420Every line from check is prefixed with timestamp and logged to appropriate 
     421logfile. 
     422 
     423Additionally, when closing the filehandle (on error), it sets when given 
     424check should be run next time. 
     425 
     426=head4 run_check 
     427 
     428Simple helper function which runs external program (or opens filehandle for 
     429reading from file), and puts it into check data. 
     430 
     431=head4 start_periodic_processes 
     432 
     433Iterates over all periodic processes, checks which should be already run, 
     434and runs them. 
     435 
     436=head4 start_persistent_processes 
     437 
     438Iterates over all persistent processes and runs them. This is done only 
     439once, from run() method. 
     440 
     441=head4 calculate_timeout 
     442 
     443Helper function which calculates how long should main_loop() wait for data 
     444from IO::Select before it has to run another round of 
     445start_periodic_processes(). 
     446 
     447=head4 update_logger_filehandles 
     448 
     449Checks if current timestamp has changed enough to require swapping files, 
     450and if yes - closes old ones and opens new ones - making all necessary 
     451directories to make it happen. 
     452 
     453=head4 checks 
     454 
     455Wrapper to be able to write: 
     456 
     457    for my $C ( $self->checks ) { 
     458 
     459instead of: 
     460 
     461    for my $C ( @{ $self->{ 'checks'} } ) { 
     462 
     463=head4 validate_config 
     464 
     465Verifies that config values make sense, and reorganizes them into final data 
     466structure (checks hashes in $self->{'checks'} arrayref). 
     467 
     468=head4 read_config 
     469 
     470Just like name suggests - reads given config to memory. Very simple parser 
     471based on regular expressions. 
    335472 
    336473=head2 LICENSE