pg-recover/pg-recover.sh

135 lines
4.2 KiB
Bash
Raw Permalink Normal View History

2025-01-11 21:20:07 +00:00
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# Make sure we have all the right parameters:
PGR_USER="${1:-}"
PGR_HOST="${2:-}"
PGR_BASE="${3:-}"
PGR_TABLE="${4:-}"
PGR_PRIMARY_KEY="${5:-}"
PGR_NONNULL_COL="${6:-}"
PGR_COMMIT_SIZE="${7:-500}"
PGR_START_AT="${8:-}"
PGR_SKIPS_SQL=""
PGR_START_AT_SQL=""
PASS_NO=0
if [ -z "$PGR_USER" ] || [ -z "$PGR_HOST" ] || [ -z "$PGR_BASE" ] || [ -z "$PGR_TABLE" ] || [ -z "$PGR_PRIMARY_KEY" ] || [ -z "$PGR_NONNULL_COL" ]; then
echo "USAGE: pg-recover.sh <user> <host> <database> <table> <primary key> <nonnull col> [<commit size=500>] [<start at>]"
echo ""
echo "user - the Postgres user to connect with"
echo "host - the Postgres server host"
echo "database - the Postgres database"
echo "table - the Postgres table"
echo "primary key - the name of the SERIAL primary key column"
echo "nonnull col - the name of a DIFFERENT non-null column on the table"
echo "commit size - how many rows to recover before committing the transaction (default: 500)"
echo "start at - start at the specific primary key (descending)"
echo ""
echo "Copyright (c) 2025 Garrett Mills <shout@garrettmills.dev>"
echo "https://code.garrettmills.dev/garrettmills/pg-recover"
exit 1
fi
if [ -n "$PGR_START_AT" ]; then
PGR_START_AT_SQL="cnt := ${PGR_START_AT};"
fi
# Drop and re-create the recovery table based on the original table's DDL
pgr_reset_recovery() {
echo "Resetting recovery table..."
set -ex
psql -U "$PGR_USER" -h "$PGR_HOST" -c "drop table if exists ${PGR_TABLE}_recovery" "$PGR_BASE"
psql -U "$PGR_USER" -h "$PGR_HOST" -c "create table ${PGR_TABLE}_recovery (like ${PGR_TABLE})" "$PGR_BASE"
set +ex
echo "Done."
}
# Replace the placeholders in the pg-recover stored procedure and (re-)create it
pgr_populate_proc() {
set -e
rm -f pg-recover.sql.inst
cp "${SCRIPT_DIR}/pg-recover.sql" pg-recover.sql.inst
sed -i "s/pgr_primary_key/${PGR_PRIMARY_KEY}/g" pg-recover.sql.inst
sed -i "s/pgr_table/${PGR_TABLE}/g" pg-recover.sql.inst
sed -i "s/pgr_nonnull_col/${PGR_NONNULL_COL}/g" pg-recover.sql.inst
sed -i "s/pgr_commit_size/${PGR_COMMIT_SIZE}/g" pg-recover.sql.inst
sed -i "s/pgr_start_at/${PGR_START_AT_SQL}/g" pg-recover.sql.inst
sed -i.bak -e "/pgr_skips/ {r /dev/stdin" -e "d}" pg-recover.sql.inst <<< "$PGR_SKIPS_SQL"
psql -U "$PGR_USER" -h "$PGR_HOST" "$PGR_BASE" < pg-recover.sql.inst
set +e
}
# Wait for the Postgres server to come back online (e.g. after a crash)
pgr_wait_online() {
local TRIES
TRIES=100
while [[ "$TRIES" -gt 0 ]]; do
echo " - Waiting for Postgres server (tries: $TRIES)"
TRIES="$((TRIES - 1))"
if psql -U "$PGR_USER" -h "$PGR_HOST" -c "select 1;" "$PGR_BASE" > /dev/null 2>&1; then
return
fi
sleep 10
done
echo "Postgres server did not come back online in time"
exit 1
}
# (Recursive) Try to recover rows from the bad table to the recovery table. If the server crashes,
# then add a region of the primary key to skip and try again. Dumps the data from each attempt.
pgr_recovery_pass() {
PASS_NO="$((PASS_NO + 1))"
echo "Attempting recovery pass #${PASS_NO}..."
pgr_reset_recovery
pgr_populate_proc
psql -U "$PGR_USER" -h "$PGR_HOST" -c "call pg_recover_proc()" "$PGR_BASE" > pg-recover.log 2>&1
proc_stat="$?"
if [ "$proc_stat" != 0 ]; then
echo " - Recovery pass failed. Attempting to skip invalid primary key range."
LAST_ID="$(tail -n 50 pg-recover.log | grep 'PGR_' | tac | head -n 1 | rev | cut -d' ' -f1 | rev)"
PREV_ID="$((LAST_ID - PGR_COMMIT_SIZE))"
NEXT_ID="$((LAST_ID + PGR_COMMIT_SIZE))"
echo " - LAST ID: $LAST_ID | BLOCK RANGE: $PREV_ID - $NEXT_ID"
PGR_SKIPS_SQL="
$PGR_SKIPS_SQL
IF cnt <= $NEXT_ID AND cnt > $PREV_ID THEN
cnt := $PREV_ID;
END IF;
"
pgr_wait_online
echo " - Dumping recovery data from last attempt"
pg_dump -U "$PGR_USER" -h "$PGR_HOST" --table "${PGR_TABLE}_recovery" "$PGR_BASE" > "pgr-attempt-${PASS_NO}.sql" || echo " - WARN: Failed to dump recovery data"
pgr_recovery_pass
fi
}
# Let 'er rip!
pgr_wait_online
pgr_recovery_pass
echo "Exporting recovery data..."
pg_dump -U "$PGR_USER" -h "$PGR_HOST" --table "${PGR_TABLE}_recovery" "$PGR_BASE" > "pgr-final-attempt.sql" || echo "ERROR: Failed to dump final recovery data"