root/trunk/tools/compact_table

Revision 191, 9.2 kB (checked in by depesz, 4 years ago)

add ability to turn on/off vacuums, and choose better field for updates

  • Property svn:executable set to *
Line 
1 #!/bin/bash
2
3 # Function definitions have to be here.
4 # For actual code ran when you run this script, search for "# MAIN PROGRAM #"
5
6 # Function    : show_reindexation_sql
7 # Description : Post-compating indexes of the table are usuallt quite
8 #             : bloated, so they should be reindexed. This functions shows
9 #             : how to do it.
10 show_reindexation_sql() {
11     $PSQL -q -A -t -X -c "select pg_get_indexdef(indexrelid) from pg_index where indrelid = '$TABLE_SCHEMA.$TABLE_NAME'::regclass" | while read LINE
12     do
13         OLD_INDEX_NAME="$( echo "$LINE" | sed 's/ ON .*//' | awk '{print $NF}' )"
14         echo "SELECT 'Size of index ($TABLE_SCHEMA.$OLD_INDEX_NAME) before reindex:', pg_relation_size('$TABLE_SCHEMA.$OLD_INDEX_NAME');"
15         echo "$LINE" | sed 's/ ON /_new ON /;s/ INDEX / INDEX CONCURRENTLY /;s/$/;/'
16         echo "DROP INDEX $TABLE_SCHEMA.$OLD_INDEX_NAME;"
17         echo "ALTER INDEX $TABLE_SCHEMA.${OLD_INDEX_NAME}_new RENAME TO $OLD_INDEX_NAME;"
18         echo "SELECT 'Size of index ($TABLE_SCHEMA.$OLD_INDEX_NAME) after reindex:', pg_relation_size('$TABLE_SCHEMA.$OLD_INDEX_NAME');"
19     done
20 }
21
22 # Function    : clean_page
23 # Description : does updates on in the table, for every row in given page,
24 #             : up to the moment, when *all* rows are moved to earlier page,
25 #             : or *any* row is moved to later page
26 #             : Finishes also if there are no rows on the page.
27 clean_page() {
28     USE_PAGE="$1"
29     if (( "$USE_PAGE" < 0 ))
30     then
31         RETURN="ERROR"
32         return
33     fi
34     ti=0
35     while (( $ti < $TUPLES_PER_PAGE ))
36     do
37         ti=$(( $ti + 1 ))
38         NEW_CTID="$( $PSQL -q -A -t -X -c "UPDATE $TABLE_SCHEMA.$TABLE_NAME SET $COLUMN_TO_UPDATE = $COLUMN_TO_UPDATE WHERE ctid = '($USE_PAGE,$ti)'::tid RETURNING ctid" | tr -d "()" )"
39         if [[ -z "$NEW_CTID" ]]
40         then
41             continue
42         fi
43         NEW_PAGE=${NEW_CTID%%,*}
44         NEW_TUPLE=${NEW_CTID##*,}
45         if (( $NEW_PAGE > $USE_PAGE ))
46         then
47             RETURN="nok"
48             return
49         fi
50         if (( $NEW_PAGE < $USE_PAGE ))
51         then
52             continue
53         fi
54         if (( $NEW_TUPLE < $ti ))
55         then
56             ti=$(( $NEW_TUPLE - 1 ))
57         fi
58     done
59     RETURN="ok"
60     return
61 }
62
63 # Function    : get_relation_page_count
64 # Description : Returns (in global RETURN variable) number of pages in relation
65 get_relation_page_count() {
66     RETURN=$( $PSQL -q -A -t -X -c "SELECT pg_relation_size( '$TABLE_SCHEMA.$TABLE_NAME') / current_setting('block_size')::int4" )
67 }
68
69 # Function    : get_base_settings
70 # Description : Gets base information from database - size of relation
71 #             : before all processing, page size, header size.
72 get_base_settings() {
73     TUPLES_PER_PAGE=$( psql -q -A -t -X -c "select current_setting('block_size')::int4 / sum(attlen) from pg_attribute where attrelid = '$TABLE_SCHEMA.$TABLE_NAME'::regclass and attnum < 0" )
74     verbose_msg "At most, we can have %d tuples per page.\n" "$TUPLES_PER_PAGE"
75     COLUMN_TO_UPDATE=$( psql -q -A -t -X -c "select quote_ident(attname) from pg_attribute where attrelid = '$TABLE_SCHEMA.$TABLE_NAME'::regclass and attnum > 0 and not attisdropped and attnum::text not in (select regexp_split_to_table(indkey::text, ' ') from pg_index where indrelid = '$TABLE_SCHEMA.$TABLE_NAME'::regclass) order by attnum asc limit 1" )
76     if [[ -z "$COLUMN_TO_UPDATE" ]]
77     then
78         COLUMN_TO_UPDATE=$( psql -q -A -t -X -c "select quote_ident(attname) from pg_attribute where attrelid = '$TABLE_SCHEMA.$TABLE_NAME'::regclass and attnum > 0 and not attisdropped order by attnum asc limit 1" )
79     fi
80     if [[ -z "$COLUMN_TO_UPDATE" ]]
81     then
82         show_help_and_exit "Cannot find column to update. Does this table have any columns?"
83     fi
84     verbose_msg "Updates will be done on column: %s\n" "$COLUMN_TO_UPDATE"
85 }
86
87
88 # Function    : show_help_and_exit
89 # Description : Like the name suggests, it prints help page, and exits script
90 #             : If given args, treats them as format and arguments for printf
91 #             : and prints before help page
92 show_help_and_exit () {
93     if (( $# > 0 ))
94     then
95         FORMAT="ERROR:\n$1\n\n"
96         printf "$FORMAT" "${@:2:$#}" >&2
97     fi
98     cat <<_EO_HELP_
99 Syntax:
100     $0 [arguments] [options]
101
102 Arguments:
103     -n       : namespace in which table exists
104     -t       : table name
105     -k       : number of pages to try to free
106 Options:
107     -U       : database user name
108     -h       : database server host or socket directory
109     -p       : database server port
110     -d       : database name to connect to
111     -c       : path to psql program
112     -v       : show information while processing log files
113     -i       : turn on initial vacuum
114     -I       : turn off initial vacuum
115     -f       : turn on final vacuum
116     -F       : turn off final vacuum
117
118 Defaults:
119     -c psql -n public -k 10 -f
120
121 Description:
122
123 $0 connects to given table, and tries to compact given table, as described
124 in this blogpost:
125 http://blog.endpoint.com/2010/09/reducing-bloat-without-locking.html
126 _EO_HELP_
127     exit
128 }
129
130 # Function    : verbose_msg
131 # Description : Calls printf on given args, but only if VERBOSE is on.
132 verbose_msg () {
133     if (( $VERBOSE == 1 ))
134     then
135         printf "%s : " "$( date '+%Y-%m-%d %H:%M:%S' )"
136         printf "$@"
137     fi
138 }
139
140 # Function    : read_arguments
141 # Description : Reads arguments from command line, and validates them
142 #             : default values are in "MAIN PROGRAM" to simplify finding them
143 read_arguments () {
144     while getopts ':U:h:p:d:c:n:t:k:xviIfF' opt "$@"
145     do
146         case "$opt" in
147             U)
148                 export PGUSER="$OPTARG"
149                 ;;
150             h)
151                 export PGHOST="$OPTARG"
152                 ;;
153             p)
154                 export PGPORT="$OPTARG"
155                 ;;
156             d)
157                 export PGDATABASE="$OPTARG"
158                 ;;
159             c)
160                 export PSQL="$OPTARG"
161                 ;;
162             n)
163                 export TABLE_SCHEMA="$OPTARG"
164                 ;;
165             t)
166                 export TABLE_NAME="$OPTARG"
167                 ;;
168             k)
169                 export CLEAN_PAGES="$OPTARG"
170                 ;;
171             i)
172                 export INITIAL_VACUUM="1"
173                 ;;
174             I)
175                 export INITIAL_VACUUM="0"
176                 ;;
177             f)
178                 export FINAL_VACUUM="1"
179                 ;;
180             F)
181                 export FINAL_VACUUM="0"
182                 ;;
183             x)
184                 EXTENDED_DEBUG=1
185                 ;;
186             v)
187                 VERBOSE=1
188                 ;;
189             :)
190                 show_help_and_exit "Option -%s requires argument" "$OPTARG"
191                 ;;
192             \?)
193                 if [[ "$OPTARG" == "?" ]]
194                 then
195                     show_help_and_exit
196                 fi
197                 show_help_and_exit "Unknown option -%s" "$OPTARG"
198                 ;;
199         esac
200     done
201     if [[ -z "$TABLE_SCHEMA" ]]
202     then
203         show_help_and_exit "Table schema (-n) cannot be empty!"
204     fi
205     if [[ -z "$TABLE_NAME" ]]
206     then
207         show_help_and_exit "Table name (-t) cannot be empty!"
208     fi
209     if [[ -z "$CLEAN_PAGES" ]]
210     then
211         show_help_and_exit "Clean pages (-k) cannot be empty!"
212     fi
213     if [[ ! "$CLEAN_PAGES" =~ ^[1-9][0-9]*$ ]]
214     then
215         show_help_and_exit "Number of pages to clean (%s) is not a valid number (1+, integer)" "$CLEAN_PAGES"
216     fi
217 }
218
219 # MAIN PROGAM #
220
221 # default values for options
222 EXTENDED_DEBUG=0
223 VERBOSE=0
224 export TABLE_SCHEMA=public
225 export CLEAN_PAGES=10
226 export PSQL='psql'
227 export FINAL_VACUUM=1
228
229 # Set locale to sane one, to speed up comparisons, and be sure that < and > on
230 # strings work ok.
231 export LC_ALL=C
232
233 # Read arguments from command line
234 read_arguments "$@"
235
236 # Print settings
237 verbose_msg "$0 Settings:
238   - CLEAN_PAGES       : $CLEAN_PAGES
239   - EXTENDED_DEBUG    : $EXTENDED_DEBUG
240   - FINAL_VACUUM      : $FINAL_VACUUM
241   - INITIAL_VACUUM    : $INITIAL_VACUUM
242   - PGDATABASE        : $PGDATABASE
243   - PGHOST            : $PGHOST
244   - PGPORT            : $PGPORT
245   - PGUSER            : $PGUSER
246   - PSQL              : $PSQL
247   - TABLE_NAME        : $TABLE_NAME
248   - TABLE_SCHEMA      : $TABLE_SCHEMA
249   - VERBOSE           : $VERBOSE
250 "
251
252 # Make sure every error past this line is critical - this is to avoid having to
253 # check return codes from all calls to external programs
254 set -e
255
256 # Turn on extended debug
257 if (( $EXTENDED_DEBUG == 1 ))
258 then
259     set -x
260 fi
261
262 get_base_settings
263
264 verbose_msg "Entering main loop.\n"
265
266 if [[ "$INITIAL_VACUUM" -eq 1 ]]
267 then
268     verbose_msg "Initial vacuuming\n"
269     $PSQL -q -A -t -X -c "VACUUM $TABLE_SCHEMA.$TABLE_NAME"
270 fi
271
272 get_relation_page_count
273 CURRENT_PAGE_COUNT=$RETURN
274 verbose_msg "Current table size: %d pages.\n" "$CURRENT_PAGE_COUNT"
275 i=0
276 while [[ $i -lt $CLEAN_PAGES ]]
277 do
278     i=$(( $i + 1 ))
279     PAGE_TO_WORK_ON=$(( $CURRENT_PAGE_COUNT - $i ))
280     verbose_msg "Working on page %d (%d of %d)\n" "$PAGE_TO_WORK_ON" "$i" "$CLEAN_PAGES"
281     clean_page $PAGE_TO_WORK_ON
282     if [[ "$RETURN" != "ok" ]]
283     then
284         break
285     fi
286 done
287
288 if [[ "$FINAL_VACUUM" -eq 1 ]]
289 then
290     verbose_msg "Final vacuuming\n"
291     $PSQL -q -A -t -X -c "VACUUM $TABLE_SCHEMA.$TABLE_NAME"
292 fi
293
294 get_relation_page_count
295 verbose_msg "Final table size: %d pages.\n" "$RETURN"
296
297 show_reindexation_sql
298
299 verbose_msg "All done.\n"
Note: See TracBrowser for help on using the browser.