135 lines
4.2 KiB
Bash
135 lines
4.2 KiB
Bash
|
#!/bin/bash
|
||
|
|
||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||
|
|
||
|
# Make sure we have all the right parameters:
|
||
|
|
||
|
PGR_USER="${1:-}"
|
||
|
PGR_HOST="${2:-}"
|
||
|
PGR_BASE="${3:-}"
|
||
|
PGR_TABLE="${4:-}"
|
||
|
PGR_PRIMARY_KEY="${5:-}"
|
||
|
PGR_NONNULL_COL="${6:-}"
|
||
|
PGR_COMMIT_SIZE="${7:-500}"
|
||
|
PGR_START_AT="${8:-}"
|
||
|
|
||
|
PGR_SKIPS_SQL=""
|
||
|
PGR_START_AT_SQL=""
|
||
|
PASS_NO=0
|
||
|
|
||
|
if [ -z "$PGR_USER" ] || [ -z "$PGR_HOST" ] || [ -z "$PGR_BASE" ] || [ -z "$PGR_TABLE" ] || [ -z "$PGR_PRIMARY_KEY" ] || [ -z "$PGR_NONNULL_COL" ]; then
|
||
|
echo "USAGE: pg-recover.sh <user> <host> <database> <table> <primary key> <nonnull col> [<commit size=500>] [<start at>]"
|
||
|
echo ""
|
||
|
echo "user - the Postgres user to connect with"
|
||
|
echo "host - the Postgres server host"
|
||
|
echo "database - the Postgres database"
|
||
|
echo "table - the Postgres table"
|
||
|
echo "primary key - the name of the SERIAL primary key column"
|
||
|
echo "nonnull col - the name of a DIFFERENT non-null column on the table"
|
||
|
echo "commit size - how many rows to recover before committing the transaction (default: 500)"
|
||
|
echo "start at - start at the specific primary key (descending)"
|
||
|
echo ""
|
||
|
echo "Copyright (c) 2025 Garrett Mills <shout@garrettmills.dev>"
|
||
|
echo "https://code.garrettmills.dev/garrettmills/pg-recover"
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
if [ -n "$PGR_START_AT" ]; then
|
||
|
PGR_START_AT_SQL="cnt := ${PGR_START_AT};"
|
||
|
fi
|
||
|
|
||
|
|
||
|
# Drop and re-create the recovery table based on the original table's DDL
|
||
|
pgr_reset_recovery() {
|
||
|
echo "Resetting recovery table..."
|
||
|
|
||
|
set -ex
|
||
|
psql -U "$PGR_USER" -h "$PGR_HOST" -c "drop table if exists ${PGR_TABLE}_recovery" "$PGR_BASE"
|
||
|
psql -U "$PGR_USER" -h "$PGR_HOST" -c "create table ${PGR_TABLE}_recovery (like ${PGR_TABLE})" "$PGR_BASE"
|
||
|
set +ex
|
||
|
echo "Done."
|
||
|
}
|
||
|
|
||
|
|
||
|
# Replace the placeholders in the pg-recover stored procedure and (re-)create it
|
||
|
pgr_populate_proc() {
|
||
|
set -e
|
||
|
rm -f pg-recover.sql.inst
|
||
|
cp "${SCRIPT_DIR}/pg-recover.sql" pg-recover.sql.inst
|
||
|
|
||
|
sed -i "s/pgr_primary_key/${PGR_PRIMARY_KEY}/g" pg-recover.sql.inst
|
||
|
sed -i "s/pgr_table/${PGR_TABLE}/g" pg-recover.sql.inst
|
||
|
sed -i "s/pgr_nonnull_col/${PGR_NONNULL_COL}/g" pg-recover.sql.inst
|
||
|
sed -i "s/pgr_commit_size/${PGR_COMMIT_SIZE}/g" pg-recover.sql.inst
|
||
|
sed -i "s/pgr_start_at/${PGR_START_AT_SQL}/g" pg-recover.sql.inst
|
||
|
sed -i.bak -e "/pgr_skips/ {r /dev/stdin" -e "d}" pg-recover.sql.inst <<< "$PGR_SKIPS_SQL"
|
||
|
|
||
|
psql -U "$PGR_USER" -h "$PGR_HOST" "$PGR_BASE" < pg-recover.sql.inst
|
||
|
set +e
|
||
|
}
|
||
|
|
||
|
|
||
|
# Wait for the Postgres server to come back online (e.g. after a crash)
|
||
|
pgr_wait_online() {
|
||
|
local TRIES
|
||
|
TRIES=100
|
||
|
|
||
|
while [[ "$TRIES" -gt 0 ]]; do
|
||
|
echo " - Waiting for Postgres server (tries: $TRIES)"
|
||
|
TRIES="$((TRIES - 1))"
|
||
|
if psql -U "$PGR_USER" -h "$PGR_HOST" -c "select 1;" "$PGR_BASE" > /dev/null 2>&1; then
|
||
|
return
|
||
|
fi
|
||
|
|
||
|
sleep 10
|
||
|
done
|
||
|
|
||
|
echo "Postgres server did not come back online in time"
|
||
|
exit 1
|
||
|
}
|
||
|
|
||
|
|
||
|
# (Recursive) Try to recover rows from the bad table to the recovery table. If the server crashes,
|
||
|
# then add a region of the primary key to skip and try again. Dumps the data from each attempt.
|
||
|
pgr_recovery_pass() {
|
||
|
PASS_NO="$((PASS_NO + 1))"
|
||
|
echo "Attempting recovery pass #${PASS_NO}..."
|
||
|
|
||
|
pgr_reset_recovery
|
||
|
pgr_populate_proc
|
||
|
|
||
|
psql -U "$PGR_USER" -h "$PGR_HOST" -c "call pg_recover_proc()" "$PGR_BASE" > pg-recover.log 2>&1
|
||
|
|
||
|
proc_stat="$?"
|
||
|
if [ "$proc_stat" != 0 ]; then
|
||
|
echo " - Recovery pass failed. Attempting to skip invalid primary key range."
|
||
|
|
||
|
LAST_ID="$(tail -n 50 pg-recover.log | grep 'PGR_' | tac | head -n 1 | rev | cut -d' ' -f1 | rev)"
|
||
|
PREV_ID="$((LAST_ID - PGR_COMMIT_SIZE))"
|
||
|
NEXT_ID="$((LAST_ID + PGR_COMMIT_SIZE))"
|
||
|
echo " - LAST ID: $LAST_ID | BLOCK RANGE: $PREV_ID - $NEXT_ID"
|
||
|
|
||
|
PGR_SKIPS_SQL="
|
||
|
$PGR_SKIPS_SQL
|
||
|
IF cnt <= $NEXT_ID AND cnt > $PREV_ID THEN
|
||
|
cnt := $PREV_ID;
|
||
|
END IF;
|
||
|
"
|
||
|
|
||
|
pgr_wait_online
|
||
|
|
||
|
echo " - Dumping recovery data from last attempt"
|
||
|
pg_dump -U "$PGR_USER" -h "$PGR_HOST" --table "${PGR_TABLE}_recovery" "$PGR_BASE" > "pgr-attempt-${PASS_NO}.sql" || echo " - WARN: Failed to dump recovery data"
|
||
|
|
||
|
pgr_recovery_pass
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
|
||
|
# Let 'er rip!
|
||
|
pgr_wait_online
|
||
|
pgr_recovery_pass
|
||
|
|
||
|
echo "Exporting recovery data..."
|
||
|
pg_dump -U "$PGR_USER" -h "$PGR_HOST" --table "${PGR_TABLE}_recovery" "$PGR_BASE" > "pgr-final-attempt.sql" || echo "ERROR: Failed to dump final recovery data"
|