Postgres: How to backup and restore.

Natarajan Santhosh
5 min readNov 21, 2019

--

FYI: great article on pgdump and sending to aws s3

Create a postgres container.

We will be working inside postgres container

data size on public schema~200MB

row sizes 126k and 250k

docker run --rm   --name pg-docker -e POSTGRES_PASSWORD=docker -d -p 5432:5432 -v $HOME/docker/volumes/postgres:/var/lib/postgresql/data -v $HOME/docker/volumes/temp:/datastore  postgres

Now download github_events and github_users from the sample csv files and save on below host directory as events.csv and users.csv respectively. These files will be available in the directory on docker container /datastore.

$HOME/docker/volumes/temp

Login to psql and create 2 tables

# enter docker container shell
docker exec -it pg-docker bash
# specify the postgres password in the config file
echo localhost:5432:public:postgres:docker >> ~/.pgpass
# psql
psql -h localhost -U postgres -d postgres
# create github_events and github_users tables
CREATE TABLE github_events
(
event_id bigint,
event_type text,
event_public boolean,
repo_id bigint,
payload jsonb,
repo jsonb,
user_id bigint,
org jsonb,
created_at timestamp
);

CREATE TABLE github_users
(
user_id bigint,
url text,
login text,
avatar_url text,
gravatar_id text,
display_login text
);
# import table data from csv files
\copy github_events from '/datastore/events.csv' DELIMITER ',' CSV HEADER;
\copy github_users from '/datastore/users.csv' DELIMITER ',' CSV HEADER;

Verify that csv file imported to tables were successful and create a pg_dump of the public tables.

# now run the below sql commands
postgres=# select count(*) from github_events;
count
--------
126244
(1 row)
postgres=# select count(*) from github_users;
count
--------
264307
(1 row)
# exit out of psql and run pg_dumppg_dump -h localhost -p 5432 -U postgres > ~/users-events.sql;

Modify an user record.

# enter docker container shell
docker exec -it pg-docker bash
# psql
psql -h localhost -U postgres -d postgres
# update one record (we will verify later that this update is not present once we restore the database)
update github_users set url='https://api.github.com/users/macournoyerremove' where user_id = 22;

Update the the dump file, by adding the following lines to top of the file. Set role dba is only needed if current user does not have the permission to drop schema. Ask your db admin for the name of role.

set role dba;
DROP SCHEMA IF EXISTS public CASCADE;
CREATE SCHEMA public;
GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO public;

Restore

time the pg restore from the dump file by appending time

real means “wall-clock time” user and sys show CPU clock time, split between regular code and system calls.

root@5a2d84492e15:/# time psql -h localhost -U postgres -d postgres -f /datastore/users-events.sql
psql:/datastore/users-events.sql:1: ERROR: role "dba" does not exist
psql:/datastore/users-events.sql:2: NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table github_events
drop cascades to table github_users
DROP SCHEMA
CREATE SCHEMA
GRANT
GRANT
SET
SET
SET
SET
SET
set_config
------------
(1 row)SET
SET
SET
SET
SET
SET
CREATE TABLE
ALTER TABLE
CREATE TABLE
ALTER TABLE
COPY 126244
COPY 264307
CREATE INDEX
CREATE INDEX
real 1m22.012s
user 0m0.260s
sys 0m1.670s

Speed up the restore by running in a single transaction

wall clock time: 88s vs 68s

user time: 0.26s vs 0.37s

sys time: 1.67s vs 0.6s

root@5a2d84492e15:/# time psql -h localhost -U postgres -d postgres -f /datastore/users-events.sql -L ~/restore_txn_r.log
psql:/datastore/users-events.sql:1: ERROR: role "dba" does not exist
psql:/datastore/users-events.sql:2: NOTICE: drop cascades to 2 other objects
DETAIL: drop cascades to table github_events
drop cascades to table github_users
DROP SCHEMA
CREATE SCHEMA
GRANT
GRANT
SET
SET
SET
SET
SET
set_config
------------
(1 row)SET
SET
SET
SET
SET
SET
CREATE TABLE
ALTER TABLE
CREATE TABLE
ALTER TABLE
COPY 126244
COPY 264307
CREATE INDEX
CREATE INDEX
real 1m8.815s
user 0m0.370s
sys 0m0.600s

backup database custom fileformat

# enter docker container shell
docker exec -it pg-docker bash
# specify the postgres password in the config file
echo localhost:5432:public:postgres:docker >> ~/.pgpass
root@5a2d84492e15:~# time pg_dump -Fc -h localhost -p 5432 -U postgres > ~/users-events.gz;real 0m12.570s
user 0m8.320s
sys 0m0.510s
root@5a2d84492e15:~# time pg_restore -v -c -Fc --role=postgres --single-transaction -h localhost -U postgres -d postgres ~/users-events.gz
pg_restore: connecting to database for restore
pg_restore: dropping INDEX payload_index
pg_restore: dropping INDEX event_type_index
pg_restore: dropping TABLE github_users
pg_restore: dropping TABLE github_events
pg_restore: creating TABLE "public.github_events"
pg_restore: creating TABLE "public.github_users"
pg_restore: processing data for table "public.github_events"
pg_restore: processing data for table "public.github_users"
pg_restore: creating INDEX "public.event_type_index"
pg_restore: creating INDEX "public.payload_index"
pg_restore: creating ACL "SCHEMA public"
real 0m56.566s
user 0m1.020s
sys 0m0.260s

multiple jobs

# multiple jobs
root@5a2d84492e15:~# time pg_dump -Fd -j 8 -h localhost -p 5432 -U postgres -f ~/users-events0;
real 0m10.658s
user 0m6.960s
sys 0m0.510s
# single transaction
root@d7a21c8e5498:/# time pg_restore -v -c -Fd --role=postgres --single-transaction -h localhost -U postgres -d postgres ~/users-events0
pg_restore: connecting to database for restore
pg_restore: dropping INDEX payload_index
pg_restore: dropping INDEX event_type_index
pg_restore: dropping TABLE github_users
pg_restore: dropping TABLE github_events
pg_restore: creating TABLE "public.github_events"
pg_restore: creating TABLE "public.github_users"
pg_restore: processing data for table "public.github_events"
pg_restore: processing data for table "public.github_users"
pg_restore: creating INDEX "public.event_type_index"
pg_restore: creating INDEX "public.payload_index"
pg_restore: creating ACL "SCHEMA public"
real 0m35.968s
user 0m1.060s
sys 0m0.090s
# multiple jobs
root@d7a21c8e5498:/# time pg_restore -v -c -Fd --role=postgres -j 8 -h localhost -U postgres -d postgres ~/users-events0
pg_restore: connecting to database for restore
pg_restore: dropping INDEX payload_index
pg_restore: dropping INDEX event_type_index
pg_restore: dropping TABLE github_users
pg_restore: dropping TABLE github_events
pg_restore: processing item 2913 ENCODING ENCODING
pg_restore: processing item 2914 STDSTRINGS STDSTRINGS
pg_restore: processing item 2915 SEARCHPATH SEARCHPATH
pg_restore: processing item 2916 DATABASE postgres
pg_restore: processing item 2917 COMMENT DATABASE postgres
pg_restore: processing item 202 TABLE github_events
pg_restore: creating TABLE "public.github_events"
pg_restore: processing item 203 TABLE github_users
pg_restore: creating TABLE "public.github_users"
pg_restore: entering main parallel loop
pg_restore: launching item 2909 TABLE DATA github_events
pg_restore: launching item 2910 TABLE DATA github_users
pg_restore: processing data for table "public.github_events"
pg_restore: processing data for table "public.github_users"
pg_restore: finished item 2910 TABLE DATA github_users
pg_restore: finished item 2909 TABLE DATA github_events
pg_restore: launching item 2781 INDEX event_type_index
pg_restore: launching item 2782 INDEX payload_index
pg_restore:pg_restore: creating INDEX "public.payload_index"
creating INDEX "public.event_type_index"
pg_restore: finished item 2781 INDEX event_type_index
pg_restore: finished item 2782 INDEX payload_index
pg_restore: launching item 2918 ACL SCHEMA public
pg_restore: creating ACL "SCHEMA public"
pg_restore: finished item 2918 ACL SCHEMA public
pg_restore: finished main parallel loop
real 0m37.396s
user 0m0.990s
sys 0m0.110s
real 0m52.986s
user 0m1.200s
sys 0m0.140s

postgres schema size

SELECT schema_name,
pg_size_pretty(sum(table_size)::bigint),
(sum(table_size) / pg_database_size(current_database())) * 100 as percent
FROM (
SELECT pg_catalog.pg_namespace.nspname as schema_name,
pg_relation_size(pg_catalog.pg_class.oid) as table_size
FROM pg_catalog.pg_class
JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
GROUP BY schema_name
ORDER BY schema_name;

List of tables

SELECT
table_schema || '.' || table_name as show_tables
FROM
information_schema.tables
WHERE
table_type = 'BASE TABLE'
AND
table_schema NOT IN ('pg_catalog', 'information_schema');

Describe a table

cluster2=>\d schema_name.table_name

References:

https://paquier.xyz/postgresql-2/postgres-9-3-feature-highlight-parallel-pg_dump/

https://unix.stackexchange.com/questions/10745/how-do-i-time-a-specific-command

--

--

No responses yet