root/trunk/tools/archive_old_tables_to_csvs

Revision 212, 8.2 kB (checked in by depesz, 3 years ago)

make it dump the tables in chronological order

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl -w
2 my $program = OmniTI::TableArchiver->new();
3 $program->run();
4
5 exit;
6
7 package OmniTI::TableArchiver;
8 use strict;
9 use warnings;
10 use Carp;
11 use English qw( -no_match_vars );
12 use Data::Dumper;
13 use Getopt::Long qw( :config no_ignore_case );
14 use Sys::Hostname;
15 use File::Basename;
16 use File::Temp qw( tempdir );
17 use File::Spec;
18 use POSIX qw( strftime );
19
20 sub new {
21     my $class = shift;
22     my $self  = {};
23     return bless $self, $class;
24 }
25
26 sub slurp {
27     my $self     = shift;
28     my $filename = shift;
29
30     open my $fh, '<', $filename or croak "Cannot open $filename : $OS_ERROR\n";
31     local $INPUT_RECORD_SEPARATOR = undef;
32     my $reply = <$fh>;
33     close $fh;
34
35     return $reply;
36 }
37
38 sub run {
39     my $self = shift;
40     $self->read_arguments();
41     $self->prepare_working_dir();
42     $self->verify_db_connection();
43     $self->get_list_of_tables_to_archive();
44
45     for my $table ( @{ $self->{ 'tables' } } ) {
46         $self->verbose_msg( 2, 'Archiving: %s', $table );
47         $self->archive_table( $table );
48     }
49
50     $self->verbose_msg( 1, 'All done.' );
51     return;
52 }
53
54 sub archive_table {
55     my $self  = shift;
56     my $table = shift;
57
58     my $hostname = hostname();
59     $hostname =~ s/\..*//;
60
61     my $filename = sprintf '%s.%s.csv%s', $hostname, $table, $self->{ 'extension' };
62
63     my $output_filepath = File::Spec->catfile( $self->{ 'archive' }, $filename );
64
65     my $sql = sprintf 'COPY %s TO stdout;', $table;
66
67     $self->get_psql_output_to_file( $sql, $output_filepath, 1 );
68
69     my $drop_return = $self->get_psql_output_to_scalar( 'DROP TABLE ' . $table );
70
71     return;
72 }
73
74 sub get_list_of_tables_to_archive {
75     my $self = shift;
76     my $sql  = sprintf q{
77 SET search_path = pg_catalog;
78 SELECT
79     c.oid::regclass
80 FROM
81     pg_class c
82     join pg_namespace n on c.relnamespace = n.oid
83 WHERE
84     n.nspname !~ '^pg_'
85     and n.nspname <> 'information_schema'
86     and n.nspname ~ '%s'
87     and c.relname ~ '%s'
88     and c.relname ~ '[0-9]{8}'
89     and c.relkind = 'r'
90     and to_date( regexp_replace( c.relname, '^.*([0-9]{8})$', E'\\\\1'), 'YYYYMMDD') < ( select now() - '%s'::interval )
91 ORDER BY 1
92         }, $self->{ 'schema' }, $self->{ 'table' }, $self->{ 'keep' };
93
94     my $tables_as_string = $self->get_psql_output_to_scalar( $sql );
95     my @tables = grep { /\S/ } split /\r?\n/, $tables_as_string;
96
97     if ( 0 == scalar @tables ) {
98         $self->verbose_msg( 1, 'There are no tables matching given criteria.' );
99         exit;
100     }
101
102     $self->verbose_msg( 1, '%u tables found to work on.', scalar @tables );
103     $self->{ 'tables' } = \@tables;
104
105     return;
106 }
107
108 sub prepare_working_dir {
109     my $self = shift;
110     my $dir = tempdir( basename( $PROGRAM_NAME ) . '.XXXXXXXX', 'CLEANUP' => 1, 'TMPDIR' => 1, );
111     $self->{ 'workdir' } = $dir;
112     return;
113 }
114
115 sub get_psql_output_to_file {
116     my $self = shift;
117     my ( $query, $stdout_file, $compress ) = @_;
118
119     my $query_file = File::Spec->catfile( $self->{ 'workdir' }, 'query.sql' );
120     open my $fh, '>', $query_file or croak( "Cannot write to $query_file : $OS_ERROR\n" );
121     print $fh $query;
122     close $fh;
123     my $stderr_file = File::Spec->catfile( $self->{ 'workdir' }, 'query.stderr' );
124
125     my $compressor = $compress ? ( "| " . $self->{ 'compressor' } . ' - ' ) : "";
126     my $command = sprintf '%s -qAtX -f %s %s > %s 2> %s', $self->{ 'psql' }, quotemeta( $query_file ), $compressor, quotemeta( $stdout_file ), quotemeta( $stderr_file );
127     $self->verbose_msg( 3, 'Running query: %s',   $query );
128     $self->verbose_msg( 4, 'Running command: %s', $command );
129
130     my $result = system $command;
131     my $stderr = $self->slurp( $stderr_file );
132
133     croak "Running [$command] failed with status $result.\n$stderr\n\n" if ( $result ) || ( $stderr );
134     unlink $query_file, $stderr_file;
135
136     return;
137 }
138
139 sub get_psql_output_to_scalar {
140     my $self  = shift;
141     my $query = shift;
142
143     my $filename = File::Spec->catfile( $self->{ 'workdir' }, 'query.output' );
144     $self->get_psql_output_to_file( $query, $filename );
145     my $result = $self->slurp( $filename );
146
147     unlink $filename;
148     return $result;
149 }
150
151 sub verbose_msg {
152     my $self  = shift;
153     my $level = shift;
154     return if $level > $self->{ 'verbose' };
155
156     my $format = shift;
157     $format =~ s/([^\n])\z/$1\n/;
158
159     print strftime( '%Y-%m-%d %H:%M:%S %Z : ', localtime time );
160     printf $format, @_;
161     return;
162 }
163
164 sub verify_db_connection {
165     my $self = shift;
166
167     my $reply = $self->get_psql_output_to_scalar( 'SELECT current_user, current_database()' );
168
169     croak( "Got unparseable output from psql when trying connection verification:\n$reply\n" ) unless $reply =~ m{\A([^|]+)\|([^|]+?)\r?\n\z};
170     my ( $user, $database ) = ( $1, $2 );
171     $self->verbose_msg( 1, 'Working with: username=%s ; database=%s', $user, $database );
172     return;
173 }
174
175 sub read_arguments {
176     my $self = shift;
177
178     my $vars = { 'verbose' => 0, 'psql' => 'psql', };
179     $self->show_help_and_die()
180         unless GetOptions( $vars, qw( dbname|d=s host|h=s port|p=i username|U=s schema|n=s table|t=s keep|k=s archive|a=s compressor|c=s extension|x=s verbose|v+ psql|P=s help|? ) );
181     $self->show_help_and_die() if $vars->{ 'help' };
182     $self->{ 'verbose' } = $vars->{ 'verbose' };
183
184     $ENV{ 'PGDATABASE' } = $vars->{ 'dbname' }   if defined $vars->{ 'dbname' };
185     $ENV{ 'PGHOST' }     = $vars->{ 'host' }     if defined $vars->{ 'host' };
186     $ENV{ 'PGPORT' }     = $vars->{ 'port' }     if defined $vars->{ 'port' };
187     $ENV{ 'PGUSER' }     = $vars->{ 'username' } if defined $vars->{ 'username' };
188
189     croak "You didn't provide --schema, and it's obligatory. Use --schema=^ to match all schemata.\n" unless $vars->{ 'schema' };
190     croak "You didn't provide --table, and it's obligatory. Use --table=^ to match all tables.\n"     unless $vars->{ 'table' };
191     croak "You didn't provide --keep, and it's obligatory.\n"                                         unless $vars->{ 'keep' };
192     croak "You didn't provide --archive, and it's obligatory.\n"                                      unless $vars->{ 'archive' };
193     croak "You didn't provide --compressor, and it's obligatory.\n"                                   unless $vars->{ 'compressor' };
194
195     croak "Given archive is not directory!\n" unless -d $vars->{ 'archive' };
196     croak "Given archive is not writable!\n"  unless -w $vars->{ 'archive' };
197
198     $vars->{ 'extension' } = '' unless defined $vars->{ 'extension' };
199     $vars->{ 'extension' } =~ s/^([^.])/.$1/;
200
201     @{ $self }{ qw(schema table keep archive compressor extension psql) } = @{ $vars }{ qw(schema table keep archive compressor extension psql) };
202
203     return;
204 }
205
206 sub show_help_and_die {
207     my $self = shift;
208     printf STDERR @_ if 0 < scalar @_;
209     print STDERR $self->help_msg();
210     exit( 1 );
211 }
212
213 sub help_msg {
214     return <<__EO_HELP__;
215 Syntax:
216     $PROGRAM_NAME [options]
217
218 [options]
219   * db connection *
220   --dbname     : -d : database name to connect to
221   --host       : -h : database server host or socket directory
222   --port       : -p : database server port
223   --username   : -U : database user name
224
225   * table selection *
226   --schema     : -n : regexp to match schema which will be scanned for
227                       tables to be archived
228   --table      : -t : regexp to match which tables should be archived
229   --keep       : -k : how old tables should be kept. Should match PostgreSQL
230                       INTERVAL syntax.
231
232   * output *
233   --archive    : -a : directory name where to put archive files
234   --compressor : -c : name of program to use for compression. Has to support
235                       compressing from stdin to stdout.
236   --extension  : -x : what extension should be added to generated filename
237                     : after compression. Leading dot is irrelevant.
238
239   * other *
240   --psql       : -P : path to psql program
241   --verbose    : -v : show debugging output. Add more -v to make it more
242                       verbose (up to -v -v -v -v)
243   --help       : -? : show this help page
244
245 Description:
246 This program will archive all tables matching given --schema and --table, as
247 long as their names and with 8 digits.
248
249 These 8 digits should represent date (format: YYYYMMDD), that is older than --keep.
250
251 If table is in schema that doesn't match --schema, or it's name doesn't
252 match --table, or its name doesn't end with 8 digits, or the digits don't
253 represent date older than --keep - it will be left untouched.
254
255
256 __EO_HELP__
257 }
258
259 1;
Note: See TracBrowser for help on using the browser.