root/trunk/tools/archive_old_tables_to_csvs

Revision 206, 8.2 kB (checked in by depesz, 3 years ago)

initial, written and tested, ersion of the script to archive tables to .csv files

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl -w
2 my $program = OmniTI::TableArchiver->new();
3 $program->run();
4
5 exit;
6
7 package OmniTI::TableArchiver;
8 use strict;
9 use warnings;
10 use Carp;
11 use English qw( -no_match_vars );
12 use Data::Dumper;
13 use Getopt::Long;
14 use Sys::Hostname;
15 use File::Basename;
16 use File::Temp qw( tempdir );
17 use File::Spec;
18 use POSIX qw( strftime );
19
20 sub new {
21     my $class = shift;
22     my $self  = {};
23     return bless $self, $class;
24 }
25
26 sub slurp {
27     my $self     = shift;
28     my $filename = shift;
29
30     open my $fh, '<', $filename or croak "Cannot open $filename : $OS_ERROR\n";
31     local $INPUT_RECORD_SEPARATOR = undef;
32     my $reply = <$fh>;
33     close $fh;
34
35     return $reply;
36 }
37
38 sub run {
39     my $self = shift;
40     $self->read_arguments();
41     $self->prepare_working_dir();
42     $self->verify_db_connection();
43     $self->get_list_of_tables_to_archive();
44
45     for my $table ( @{ $self->{ 'tables' } } ) {
46         $self->verbose_msg( 2, 'Archiving: %s', $table );
47         $self->archive_table( $table );
48     }
49
50     $self->verbose_msg( 1, 'All done.' );
51     return;
52 }
53
54 sub archive_table {
55     my $self  = shift;
56     my $table = shift;
57
58     my $hostname = hostname();
59     $hostname =~ s/\..*//;
60
61     my $filename = sprintf '%s.%s.csv%s', $hostname, $table, $self->{ 'extension' };
62
63     my $output_filepath = File::Spec->catfile( $self->{ 'archive' }, $filename );
64
65     my $sql = sprintf 'COPY %s TO stdout;', $table;
66
67     $self->get_psql_output_to_file( $sql, $output_filepath, 1 );
68
69     my $drop_return = $self->get_psql_output_to_scalar( 'DROP TABLE ' . $table );
70
71     return;
72 }
73
74 sub get_list_of_tables_to_archive {
75     my $self = shift;
76     my $sql  = sprintf q{
77 SET search_path = pg_catalog;
78 SELECT
79     c.oid::regclass
80 FROM
81     pg_class c
82     join pg_namespace n on c.relnamespace = n.oid
83 WHERE
84     n.nspname !~ '^pg_'
85     and n.nspname <> 'information_schema'
86     and n.nspname ~ '%s'
87     and c.relname ~ '%s'
88     and c.relname ~ '[0-9]{8}'
89     and c.relkind = 'r'
90     and to_date( regexp_replace( c.relname, '^.*([0-9]{8})$', E'\\\\1'), 'YYYYMMDD') < ( select now() - '%s'::interval )
91         }, $self->{ 'schema' }, $self->{ 'table' }, $self->{ 'keep' };
92
93     my $tables_as_string = $self->get_psql_output_to_scalar( $sql );
94     my @tables = grep { /\S/ } split /\r?\n/, $tables_as_string;
95
96     if ( 0 == scalar @tables ) {
97         $self->verbose_msg( 1, 'There are no tables matching given criteria.' );
98         exit;
99     }
100
101     $self->verbose_msg( 1, '%u tables found to work on.', scalar @tables );
102     $self->{ 'tables' } = \@tables;
103
104     return;
105 }
106
107 sub prepare_working_dir {
108     my $self = shift;
109     my $dir = tempdir( basename( $PROGRAM_NAME ) . '.XXXXXXXX', 'CLEANUP' => 1, 'TMPDIR' => 1, );
110     $self->{ 'workdir' } = $dir;
111     return;
112 }
113
114 sub get_psql_output_to_file {
115     my $self = shift;
116     my ( $query, $stdout_file, $compress ) = @_;
117
118     my $query_file = File::Spec->catfile( $self->{ 'workdir' }, 'query.sql' );
119     open my $fh, '>', $query_file or croak( "Cannot write to $query_file : $OS_ERROR\n" );
120     print $fh $query;
121     close $fh;
122     my $stderr_file = File::Spec->catfile( $self->{ 'workdir' }, 'query.stderr' );
123
124     my $compressor = $compress ? ( "| " . $self->{ 'compressor' } . ' - ' ) : "";
125     my $command = sprintf '%s -qAtX -f %s %s > %s 2> %s', $self->{ 'psql' }, quotemeta( $query_file ), $compressor, quotemeta( $stdout_file ), quotemeta( $stderr_file );
126     $self->verbose_msg( 3, 'Running query: %s',   $query );
127     $self->verbose_msg( 4, 'Running command: %s', $command );
128
129     my $result = system $command;
130     my $stderr = $self->slurp( $stderr_file );
131
132     croak "Running [$command] failed with status $result.\n$stderr\n\n" if ( $result ) || ( $stderr );
133     unlink $query_file, $stderr_file;
134
135     return;
136 }
137
138 sub get_psql_output_to_scalar {
139     my $self  = shift;
140     my $query = shift;
141
142     my $filename = File::Spec->catfile( $self->{ 'workdir' }, 'query.output' );
143     $self->get_psql_output_to_file( $query, $filename );
144     my $result = $self->slurp( $filename );
145
146     unlink $filename;
147     return $result;
148 }
149
150 sub verbose_msg {
151     my $self  = shift;
152     my $level = shift;
153     return if $level > $self->{ 'verbose' };
154
155     my $format = shift;
156     $format =~ s/([^\n])\z/$1\n/;
157
158     print strftime( '%Y-%m-%d %H:%M:%S %Z : ', localtime time );
159     printf $format, @_;
160     return;
161 }
162
163 sub verify_db_connection {
164     my $self = shift;
165
166     my $reply = $self->get_psql_output_to_scalar( 'SELECT current_user, current_database()' );
167
168     croak( "Got unparseable output from psql when trying connection verification:\n$reply\n" ) unless $reply =~ m{\A([^|]+)\|([^|]+?)\r?\n\z};
169     my ( $user, $database ) = ( $1, $2 );
170     $self->verbose_msg( 1, 'Working with: username=%s ; database=%s', $user, $database );
171     return;
172 }
173
174 sub read_arguments {
175     my $self = shift;
176
177     my $vars = { 'verbose' => 0, 'psql' => 'psql', };
178     $self->show_help_and_die()
179         unless GetOptions( $vars, qw( dbname|d=s host|h=s port|p=i username|U=s schema|n=s table|t=s keep|k=s archive|a=s compressor|c=s extension|x=s verbose|v+ psql|P=s help|? ) );
180     $self->show_help_and_die() if $vars->{ 'help' };
181     $self->{ 'verbose' } = $vars->{ 'verbose' };
182
183     $ENV{ 'PGDATABASE' } = $vars->{ 'dbname' }   if defined $vars->{ 'dbname' };
184     $ENV{ 'PGHOST' }     = $vars->{ 'host' }     if defined $vars->{ 'host' };
185     $ENV{ 'PGPORT' }     = $vars->{ 'port' }     if defined $vars->{ 'port' };
186     $ENV{ 'PGUSER' }     = $vars->{ 'username' } if defined $vars->{ 'username' };
187
188     croak "You didn't provide --schema, and it's obligatory. Use --schema=^ to match all schemata.\n" unless $vars->{ 'schema' };
189     croak "You didn't provide --table, and it's obligatory. Use --table=^ to match all tables.\n"     unless $vars->{ 'table' };
190     croak "You didn't provide --keep, and it's obligatory.\n"                                         unless $vars->{ 'keep' };
191     croak "You didn't provide --archive, and it's obligatory.\n"                                      unless $vars->{ 'archive' };
192     croak "You didn't provide --compressor, and it's obligatory.\n"                                   unless $vars->{ 'compressor' };
193
194     croak "Given archive is not directory!\n" unless -d $vars->{ 'archive' };
195     croak "Given archive is not writable!\n"  unless -w $vars->{ 'archive' };
196
197     $vars->{ 'extension' } = '' unless defined $vars->{ 'extension' };
198     $vars->{ 'extension' } =~ s/^([^.])/.$1/;
199
200     @{ $self }{ qw(schema table keep archive compressor extension psql) } = @{ $vars }{ qw(schema table keep archive compressor extension psql) };
201
202     return;
203 }
204
205 sub show_help_and_die {
206     my $self = shift;
207     printf STDERR @_ if 0 < scalar @_;
208     print STDERR $self->help_msg();
209     exit( 1 );
210 }
211
212 sub help_msg {
213     return <<__EO_HELP__;
214 Syntax:
215     $PROGRAM_NAME [options]
216
217 [options]
218   * db connection *
219   --dbname     : -d : database name to connect to
220   --host       : -h : database server host or socket directory
221   --port       : -p : database server port
222   --username   : -U : database user name
223
224   * table selection *
225   --schema     : -n : regexp to match schema which will be scanned for
226                       tables to be archived
227   --table      : -t : regexp to match which tables should be archived
228   --keep       : -k : how old tables should be kept. Should match PostgreSQL
229                       INTERVAL syntax.
230
231   * output *
232   --archive    : -a : directory name where to put archive files
233   --compressor : -c : name of program to use for compression. Has to support
234                       compressing from stdin to stdout.
235   --extension  : -x : what extension should be added to generated filename
236                     : after compression. Leading dot is irrelevant.
237
238   * other *
239   --psql       : -P : path to psql program
240   --verbose    : -v : show debugging output. Add more -v to make it more
241                       verbose (up to -v -v -v -v)
242   --help       : -? : show this help page
243
244 Description:
245 This program will archive all tables matching given --schema and --table, as
246 long as their names and with 8 digits.
247
248 These 8 digits should represent date (format: YYYYMMDD), that is older than --keep.
249
250 If table is in schema that doesn't match --schema, or it's name doesn't
251 match --table, or its name doesn't end with 8 digits, or the digits don't
252 represent date older than --keep - it will be left untouched.
253
254
255 __EO_HELP__
256 }
257
258 1;
Note: See TracBrowser for help on using the browser.