Add script

This commit is contained in:
Garrett Mills 2025-01-11 16:20:07 -05:00
parent 6b862196f8
commit d8455cf6a0
9 changed files with 259 additions and 2 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

6
.idea/misc.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="17" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pg-recover.iml" filepath="$PROJECT_DIR$/.idea/pg-recover.iml" />
</modules>
</component>
</project>

9
.idea/pg-recover.iml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2025 garrettmills
Copyright (c) 2025 Garrett Mills <shout@garrettmills.dev>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

View File

@ -1,3 +1,47 @@
# pg-recover
A dirty, terrible, dangerous Postgres recovery tool.
> ⚠️ **DO NOT DO THIS…** well ever really, but especially on a server with failing disks.
> This is done on a server with perfectly fine disks, but corrupted Postgres blocks.
A dirty, terrible, dangerous Postgres recovery tool.
This is designed to recover as much data as possible from a Postgres table with bad disk blocks
BUT good physical disks (say after an unclean exit, or you've snapshotted bad disks and mounted
them on a good server). It does this by creating a clean "recovery" table with the same schema
and reading rows from the bad table one at a time, skipping rows and blocks with bad disk errors.
For more specifics on how this works technically, see my blog post.
Requirements:
- Bash + tools (cat, tac, cut, sed, &c.)
- Postgres client (`psql` and `pg_dump`)
- Table must have a `SERIAL` primary key and at least one other non-`NULL` column
Usage:
```text
USAGE: pg-recover.sh <user> <host> <database> <table> <primary key> <nonnull col> [<commit size>=500] [<start at>]
user - the Postgres user to connect with
host - the Postgres server host
database - the Postgres database
table - the Postgres table
primary key - the name of the SERIAL primary key column
nonnull col - the name of a DIFFERENT non-null column on the table
commit size - how many rows to recover before committing the transaction (default: 500)
start at - start at the specific primary key (descending)
Copyright (c) 2025 Garrett Mills <shout@garrettmills.dev>
https://code.garrettmills.dev/garrettmills/pg-recover
```
Once the script finishes, you can import the recovered data like so:
```shell
psql [...] < pqr-final-attempt.sql
```
This will create a new table `<original table>_recovery` with the recovered data.
License: See the `LICENSE` file.

134
pg-recover.sh Normal file
View File

@ -0,0 +1,134 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# Make sure we have all the right parameters:
PGR_USER="${1:-}"
PGR_HOST="${2:-}"
PGR_BASE="${3:-}"
PGR_TABLE="${4:-}"
PGR_PRIMARY_KEY="${5:-}"
PGR_NONNULL_COL="${6:-}"
PGR_COMMIT_SIZE="${7:-500}"
PGR_START_AT="${8:-}"
PGR_SKIPS_SQL=""
PGR_START_AT_SQL=""
PASS_NO=0
if [ -z "$PGR_USER" ] || [ -z "$PGR_HOST" ] || [ -z "$PGR_BASE" ] || [ -z "$PGR_TABLE" ] || [ -z "$PGR_PRIMARY_KEY" ] || [ -z "$PGR_NONNULL_COL" ]; then
echo "USAGE: pg-recover.sh <user> <host> <database> <table> <primary key> <nonnull col> [<commit size=500>] [<start at>]"
echo ""
echo "user - the Postgres user to connect with"
echo "host - the Postgres server host"
echo "database - the Postgres database"
echo "table - the Postgres table"
echo "primary key - the name of the SERIAL primary key column"
echo "nonnull col - the name of a DIFFERENT non-null column on the table"
echo "commit size - how many rows to recover before committing the transaction (default: 500)"
echo "start at - start at the specific primary key (descending)"
echo ""
echo "Copyright (c) 2025 Garrett Mills <shout@garrettmills.dev>"
echo "https://code.garrettmills.dev/garrettmills/pg-recover"
exit 1
fi
if [ -n "$PGR_START_AT" ]; then
PGR_START_AT_SQL="cnt := ${PGR_START_AT};"
fi
# Drop and re-create the recovery table based on the original table's DDL
pgr_reset_recovery() {
echo "Resetting recovery table..."
set -ex
psql -U "$PGR_USER" -h "$PGR_HOST" -c "drop table if exists ${PGR_TABLE}_recovery" "$PGR_BASE"
psql -U "$PGR_USER" -h "$PGR_HOST" -c "create table ${PGR_TABLE}_recovery (like ${PGR_TABLE})" "$PGR_BASE"
set +ex
echo "Done."
}
# Replace the placeholders in the pg-recover stored procedure and (re-)create it
pgr_populate_proc() {
set -e
rm -f pg-recover.sql.inst
cp "${SCRIPT_DIR}/pg-recover.sql" pg-recover.sql.inst
sed -i "s/pgr_primary_key/${PGR_PRIMARY_KEY}/g" pg-recover.sql.inst
sed -i "s/pgr_table/${PGR_TABLE}/g" pg-recover.sql.inst
sed -i "s/pgr_nonnull_col/${PGR_NONNULL_COL}/g" pg-recover.sql.inst
sed -i "s/pgr_commit_size/${PGR_COMMIT_SIZE}/g" pg-recover.sql.inst
sed -i "s/pgr_start_at/${PGR_START_AT_SQL}/g" pg-recover.sql.inst
sed -i.bak -e "/pgr_skips/ {r /dev/stdin" -e "d}" pg-recover.sql.inst <<< "$PGR_SKIPS_SQL"
psql -U "$PGR_USER" -h "$PGR_HOST" "$PGR_BASE" < pg-recover.sql.inst
set +e
}
# Wait for the Postgres server to come back online (e.g. after a crash)
pgr_wait_online() {
local TRIES
TRIES=100
while [[ "$TRIES" -gt 0 ]]; do
echo " - Waiting for Postgres server (tries: $TRIES)"
TRIES="$((TRIES - 1))"
if psql -U "$PGR_USER" -h "$PGR_HOST" -c "select 1;" "$PGR_BASE" > /dev/null 2>&1; then
return
fi
sleep 10
done
echo "Postgres server did not come back online in time"
exit 1
}
# (Recursive) Try to recover rows from the bad table to the recovery table. If the server crashes,
# then add a region of the primary key to skip and try again. Dumps the data from each attempt.
pgr_recovery_pass() {
PASS_NO="$((PASS_NO + 1))"
echo "Attempting recovery pass #${PASS_NO}..."
pgr_reset_recovery
pgr_populate_proc
psql -U "$PGR_USER" -h "$PGR_HOST" -c "call pg_recover_proc()" "$PGR_BASE" > pg-recover.log 2>&1
proc_stat="$?"
if [ "$proc_stat" != 0 ]; then
echo " - Recovery pass failed. Attempting to skip invalid primary key range."
LAST_ID="$(tail -n 50 pg-recover.log | grep 'PGR_' | tac | head -n 1 | rev | cut -d' ' -f1 | rev)"
PREV_ID="$((LAST_ID - PGR_COMMIT_SIZE))"
NEXT_ID="$((LAST_ID + PGR_COMMIT_SIZE))"
echo " - LAST ID: $LAST_ID | BLOCK RANGE: $PREV_ID - $NEXT_ID"
PGR_SKIPS_SQL="
$PGR_SKIPS_SQL
IF cnt <= $NEXT_ID AND cnt > $PREV_ID THEN
cnt := $PREV_ID;
END IF;
"
pgr_wait_online
echo " - Dumping recovery data from last attempt"
pg_dump -U "$PGR_USER" -h "$PGR_HOST" --table "${PGR_TABLE}_recovery" "$PGR_BASE" > "pgr-attempt-${PASS_NO}.sql" || echo " - WARN: Failed to dump recovery data"
pgr_recovery_pass
fi
}
# Let 'er rip!
pgr_wait_online
pgr_recovery_pass
echo "Exporting recovery data..."
pg_dump -U "$PGR_USER" -h "$PGR_HOST" --table "${PGR_TABLE}_recovery" "$PGR_BASE" > "pgr-final-attempt.sql" || echo "ERROR: Failed to dump final recovery data"

42
pg-recover.sql Normal file
View File

@ -0,0 +1,42 @@
CREATE OR REPLACE PROCEDURE pg_recover_proc()
LANGUAGE plpgsql AS $$
DECLARE
cnt BIGINT := 0;
BEGIN
-- Get the maximum page_view_id from the page_views table
cnt := (SELECT MAX(pgr_primary_key) FROM pgr_table);
pgr_start_at
-- Loop through the page_views table in reverse order by page_view_id
LOOP
pgr_skips
BEGIN
-- Insert the row with the current page_view_id into page_views2
INSERT INTO pgr_table_recovery
SELECT * FROM pgr_table WHERE pgr_primary_key = cnt and pgr_nonnull_col is not null;
-- Decrement the counter
cnt := cnt - 1;
-- Exit the loop when cnt < 1
EXIT WHEN cnt < 1;
EXCEPTION
WHEN OTHERS THEN
-- Handle exceptions (e.g., data corruption)
IF POSITION('block' in SQLERRM) > 0 OR POSITION('status of transaction' in SQLERRM) > 0 OR POSITION('memory alloc' in SQLERRM) > 0 OR POSITION('data is corrupt' in SQLERRM) > 0 OR POSITION('MultiXactId' in SQLERRM) > 0 THEN
RAISE WARNING 'PGR_SKIP: %', cnt;
cnt := cnt - 1;
CONTINUE;
ELSE
RAISE;
END IF;
END;
IF MOD(cnt, pgr_commit_size) = 0 THEN
RAISE WARNING 'PGR_COMMIT: %', cnt;
COMMIT;
END IF;
END LOOP;
END;
$$;