From 891867130efac51d90bd21cca5c6e9b00cf642be Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 3 Jul 2025 15:22:45 +0200 Subject: [PATCH] db-dump: Reset sequences after data import This automatically resets all `id` column sequences to the maximum `id` column value of the corresponding table after the data was imported. --- script/import-database-dump.sh | 7 ++++++- script/reset-sequences.sql | 38 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 script/reset-sequences.sql diff --git a/script/import-database-dump.sh b/script/import-database-dump.sh index 413e31df471..e158ed2a0f0 100755 --- a/script/import-database-dump.sh +++ b/script/import-database-dump.sh @@ -58,11 +58,16 @@ psql -a "$DATABASE_NAME" < schema.sql echo "Importing data" psql -a "$DATABASE_NAME" < import.sql +cd "$ORIG_WD" + # Importing the database doesn't cause materialised views to be refreshed, so # let's do that. psql --command="REFRESH MATERIALIZED VIEW recent_crate_downloads" "$DATABASE_NAME" +# Reset all ID sequence values to match the imported data +echo "Resetting sequence values" +psql -a "$DATABASE_NAME" < "$(dirname "$0")/reset-sequences.sql" + # Importing the database also doesn't insert Diesel migration metadata, but we # can infer that from the dump metadata and an up to date crates.io repo. -cd "$ORIG_WD" python3 "$(dirname "$0")/infer-database-dump-version.py" -m "$DUMP_PATH/metadata.json" | psql -a "$DATABASE_NAME" diff --git a/script/reset-sequences.sql b/script/reset-sequences.sql new file mode 100644 index 00000000000..9ce5f8a488d --- /dev/null +++ b/script/reset-sequences.sql @@ -0,0 +1,38 @@ +-- Reset all ID column sequences to their maximum values or 1 +-- This script dynamically discovers all sequences associated with 'id' columns +-- and sets them to either the maximum id value in the table or 1 if empty + +DO $$ +DECLARE + rec RECORD; + max_id BIGINT; +BEGIN + FOR rec IN + WITH id_sequences AS ( + SELECT + pg_class.relname AS sequence_name, + pg_class_tables.relname AS table_name + FROM pg_class + JOIN pg_depend ON pg_depend.objid = pg_class.oid + JOIN pg_class pg_class_tables ON pg_depend.refobjid = pg_class_tables.oid + JOIN pg_attribute ON pg_depend.refobjid = pg_attribute.attrelid AND pg_depend.refobjsubid = pg_attribute.attnum + JOIN pg_namespace ON pg_class_tables.relnamespace = pg_namespace.oid + WHERE pg_class.relkind = 'S' -- sequences + AND pg_attribute.attname = 'id' -- only 'id' columns + AND pg_namespace.nspname = 'public' -- only public schema + ) + SELECT * FROM id_sequences + LOOP + -- Get the maximum id value from the table + EXECUTE format('SELECT MAX(id) FROM public.%I', rec.table_name) INTO max_id; + + -- Reset the sequence to the max value or 1 if empty + -- + -- Use is_called = false for empty tables (max_id IS NULL), + -- and is_called = true for populated tables + PERFORM setval('public.' || rec.sequence_name, COALESCE(max_id, 1), max_id IS NOT NULL); + + -- Log the action + RAISE NOTICE 'Reset sequence % to % (table: %, is_called: %)', rec.sequence_name, COALESCE(max_id, 1), rec.table_name, max_id IS NOT NULL; + END LOOP; +END $$;