From 75057138e85c5bfec04e7487eb771762f448d280 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 14 May 2025 17:42:39 +0200 Subject: [PATCH 01/20] Added tests for vacuuming functionality of dbm --- Lib/test/test_dbm.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 4be7c5649da68a..9d1e88db7bc453 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -135,6 +135,56 @@ def test_anydbm_access(self): assert(f[key] == b"Python:") f.close() + def test_anydbm_readonly_vacuum(self): + self.init_db() + with dbm.open(_fname, 'r') as d: + self.assertRaises(dbm.error, lambda: d.vacuum()) + + def test_anydbm_vacuum_not_changed_content(self): + self.init_db() + with dbm.open(_fname, 'c') as d: + keys_before = sorted(d.keys()) + values_before = [d[k] for k in keys_before] + d.vacuum() + keys_after = sorted(d.keys()) + values_after = [d[k] for k in keys_before] + self.assertEqual(keys_before, keys_after) + self.assertEqual(values_before, values_after) + + def test_anydbm_vacuum_decreased_size(self): + + def _calculate_db_size(db_path): + if os.path.isfile(db_path): + return os.path.getsize(db_path) + total_size = 0 + for root, _, filenames in os.walk(db_path): + for filename in filenames: + file_path = os.path.join(root, filename) + total_size += os.path.getsize(file_path) + return total_size + + # This test requires relatively large databases to reliably show difference in size before and after vacuum. + with dbm.open(_fname, 'n') as f: + for k in self._dict: + f[k.encode('ascii')] = self._dict[k] * 100000 + db_keys = list(f.keys()) + + # Make sure to calculate size of database only after file is closed to ensure file content are flushed to disk. + size_before = _calculate_db_size(_fname) + + # Delete some elements from the start of the database. + keys_to_delete = db_keys[:len(db_keys) // 2] + with dbm.open(_fname, 'c') as f: + for k in keys_to_delete: + del f[k] + f.vacuum() + + # Make sure to calculate size of database only after file is closed to ensure file content are flushed to disk. + size_after = _calculate_db_size(_fname) + + # Less or equal because not all submodules support vacuuming. + self.assertLessEqual(size_after, size_before) + def test_open_with_bytes(self): dbm.open(os.fsencode(_fname), "c").close() From 114777417980e934c9fad30c2cab8233ad30ebb1 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 14 May 2025 17:46:20 +0200 Subject: [PATCH 02/20] Added vacuuming logic to dbm.sqlite --- Lib/dbm/sqlite3.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py index 7e0ae2a29e3a64..db41baccbac1f4 100644 --- a/Lib/dbm/sqlite3.py +++ b/Lib/dbm/sqlite3.py @@ -15,6 +15,7 @@ STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))" DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)" ITER_KEYS = "SELECT key FROM Dict" +VACUUM = "VACUUM" class error(OSError): @@ -122,6 +123,9 @@ def __enter__(self): def __exit__(self, *args): self.close() + def vacuum(self): + self._execute(VACUUM) + def open(filename, /, flag="r", mode=0o666): """Open a dbm.sqlite3 database and return the dbm object. From 109a3788aa75b8371955f3d8f8838bcb420c1ee0 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 14 May 2025 18:34:10 +0200 Subject: [PATCH 03/20] Added vacuuming logic to dbm.dumb --- Lib/dbm/dumb.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index def120ffc3778b..55ea2cca7caa81 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -8,17 +8,12 @@ - seems to contain a bug when updating... -- reclaim free space (currently, space once occupied by deleted or expanded -items is never reused) - - support concurrent access (currently, if two processes take turns making updates, they can mess up the index) - support efficient access to large databases (currently, the whole index is read when the database is opened, and some updates rewrite the whole index) -- support opening for read-only (flag = 'm') - """ import ast as _ast @@ -289,6 +284,34 @@ def __enter__(self): def __exit__(self, *args): self.close() + def vacuum(self): + if self._readonly: + raise error('The database is opened for reading only') + self._verify_open() + # Ensure all changes are committed before vacuuming. + self._commit() + # Open file in r+ to allow changing in-place. + with _io.open(self._datfile, 'rb+') as f: + vacuum_pos = 0 + + # Iterate over existing keys, sorted by starting byte. + for key in sorted(self._index.keys(), key = lambda k: self._index[k][0]): + pos, siz = self._index[key] + f.seek(pos) + val = f.read(siz) + + f.seek(vacuum_pos) + f.write(val) + self._index[key] = (vacuum_pos, siz) + + blocks_occupied = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE + vacuum_pos += blocks_occupied * _BLOCKSIZE + + f.truncate(vacuum_pos) + # Commit changes to index, which were not in-place. + self._commit() + + def open(file, flag='c', mode=0o666): """Open the database file, filename, and return corresponding object. From cdacb530eabab485600fdf0e36c80d4c3da7a0df Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 14 May 2025 23:45:46 +0200 Subject: [PATCH 04/20] Updated documentation of dbm --- Doc/library/dbm.rst | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 36221c026d6d4b..53eac9995c9bf8 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -15,10 +15,17 @@ * :mod:`dbm.ndbm` If none of these modules are installed, the -slow-but-simple implementation in module :mod:`dbm.dumb` will be used. There +slow-but-simple implementation in module :mod:`dbm.dumb` will be used. There is a `third party interface `_ to the Oracle Berkeley DB. +.. note:: + None of the underlying modules will automatically shrink the disk space used by + the database file. However, :mod:`dbm.sqlite3` and :mod:`dbm.dumb` provide + a :meth:`!vacuum` method that can be used for this purpose. :mod:`dbm.gnu` can + do the same with its :meth:`!reorganize`, called like this for retro-compatibility. + + .. exception:: error A tuple containing the exceptions that can be raised by each of the supported @@ -186,6 +193,16 @@ or any other SQLite browser, including the SQLite CLI. The Unix file access mode of the file (default: octal ``0o666``), used only when the database has to be created. + .. method:: sqlite3.vacuum() + + If you have carried out a lot of deletions and would like to shrink the space + used on disk, this method will reorganize the database; therwise, deleted file + space will be kept and reused as new (key, value) pairs are added. + + .. note:: + During vacuuming, as much as twice the size of the original database is required + in free disk space. + :mod:`dbm.gnu` --- GNU database manager --------------------------------------- @@ -438,6 +455,9 @@ The :mod:`!dbm.dumb` module defines the following: with a sufficiently large/complex entry due to stack depth limitations in Python's AST compiler. + .. warning:: + :mod:`dbm.dumb` does not support concurrent writes, which can corrupt the database. + .. versionchanged:: 3.5 :func:`~dbm.dumb.open` always creates a new database when *flag* is ``'n'``. @@ -460,3 +480,9 @@ The :mod:`!dbm.dumb` module defines the following: .. method:: dumbdbm.close() Close the database. + + .. method:: dumbdbm.vacuum() + + If you have carried out a lot of deletions and would like to shrink the space + used on disk, this method will reorganize the database; otherwise, deleted file + space will not be reused. From 02a7b8a41394ab3ad89a6437580714b407d357e8 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 14 May 2025 23:52:03 +0200 Subject: [PATCH 05/20] Adapted vacuum tests to allow for submodules missing method --- Lib/test/test_dbm.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 9d1e88db7bc453..81babd1f6534c8 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -138,11 +138,19 @@ def test_anydbm_access(self): def test_anydbm_readonly_vacuum(self): self.init_db() with dbm.open(_fname, 'r') as d: + # Early stopping. + if not hasattr(d, 'vacuum'): + return + self.assertRaises(dbm.error, lambda: d.vacuum()) def test_anydbm_vacuum_not_changed_content(self): self.init_db() with dbm.open(_fname, 'c') as d: + # Early stopping. + if not hasattr(d, 'vacuum'): + return + keys_before = sorted(d.keys()) values_before = [d[k] for k in keys_before] d.vacuum() @@ -165,6 +173,10 @@ def _calculate_db_size(db_path): # This test requires relatively large databases to reliably show difference in size before and after vacuum. with dbm.open(_fname, 'n') as f: + # Early stopping. + if not hasattr(f, 'vacuum'): + return + for k in self._dict: f[k.encode('ascii')] = self._dict[k] * 100000 db_keys = list(f.keys()) From dcb43a2c66f276646e7b125693f3a49a58619042 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 07:40:29 +0200 Subject: [PATCH 06/20] Pushing news and acks entries --- Misc/ACKS | 1 + .../next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst diff --git a/Misc/ACKS b/Misc/ACKS index 610dcf9f4238de..210b25a8503301 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1362,6 +1362,7 @@ Milan Oberkirch Pascal Oberndoerfer Géry Ogam Seonkyo Ok +Andrea Oliveri Jeffrey Ollie Adam Olsen Bryan Olson diff --git a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst new file mode 100644 index 00000000000000..7e3c94035eeaaf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst @@ -0,0 +1,2 @@ +:mod:`dbm.dumb` and :mod:`dbm.sqlite` now have :meth:`!vacuum` methods to +recover unused free space previously occupied by deleted entries. From 89fb2db352ffa745139972c161e271351e1a036a Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 07:52:39 +0200 Subject: [PATCH 07/20] Changed News entry to avoid failure during Doc testing due to reference to module links not found --- .../next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst index 7e3c94035eeaaf..053f93a00965c4 100644 --- a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst +++ b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst @@ -1,2 +1,2 @@ -:mod:`dbm.dumb` and :mod:`dbm.sqlite` now have :meth:`!vacuum` methods to +:mod:`!dbm.dumb` and :mod:`!dbm.sqlite` now have :meth:`!vacuum` methods to recover unused free space previously occupied by deleted entries. From 476dc55d3a05afb0fb199ea392f4650d2acec8c6 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 12:32:27 +0200 Subject: [PATCH 08/20] Changed method names from .vacuum to .reorganize in dbm.sqlite and dbm.dumb for consistency with dbm.gnu. Also updated documentations and tests to reflect the change --- Doc/library/dbm.rst | 13 +++++---- Lib/dbm/dumb.py | 14 +++++----- Lib/dbm/sqlite3.py | 6 ++--- Lib/test/test_dbm.py | 27 +++++++++---------- ...-05-15-00-27-09.gh-issue-134004.e8k4-R.rst | 2 +- 5 files changed, 30 insertions(+), 32 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 53eac9995c9bf8..40789c29a14a3e 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -21,9 +21,8 @@ the Oracle Berkeley DB. .. note:: None of the underlying modules will automatically shrink the disk space used by - the database file. However, :mod:`dbm.sqlite3` and :mod:`dbm.dumb` provide - a :meth:`!vacuum` method that can be used for this purpose. :mod:`dbm.gnu` can - do the same with its :meth:`!reorganize`, called like this for retro-compatibility. + the database file. However, :mod:`dbm.sqlite3`, :mod:`dbm.gnu` and :mod:`dbm.dumb` + provide a :meth:`!reorganize` method that can be used for this purpose. .. exception:: error @@ -193,14 +192,14 @@ or any other SQLite browser, including the SQLite CLI. The Unix file access mode of the file (default: octal ``0o666``), used only when the database has to be created. - .. method:: sqlite3.vacuum() + .. method:: sqlite3.reorganize() If you have carried out a lot of deletions and would like to shrink the space - used on disk, this method will reorganize the database; therwise, deleted file + used on disk, this method will reorganize the database; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. .. note:: - During vacuuming, as much as twice the size of the original database is required + While reorganizing, as much as twice the size of the original database is required in free disk space. @@ -481,7 +480,7 @@ The :mod:`!dbm.dumb` module defines the following: Close the database. - .. method:: dumbdbm.vacuum() + .. method:: dumbdbm.reorganize() If you have carried out a lot of deletions and would like to shrink the space used on disk, this method will reorganize the database; otherwise, deleted file diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index 55ea2cca7caa81..7b1a5a31801bca 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -284,15 +284,15 @@ def __enter__(self): def __exit__(self, *args): self.close() - def vacuum(self): + def reorganize(self): if self._readonly: raise error('The database is opened for reading only') self._verify_open() - # Ensure all changes are committed before vacuuming. + # Ensure all changes are committed before reorganizing. self._commit() # Open file in r+ to allow changing in-place. with _io.open(self._datfile, 'rb+') as f: - vacuum_pos = 0 + reorganize_pos = 0 # Iterate over existing keys, sorted by starting byte. for key in sorted(self._index.keys(), key = lambda k: self._index[k][0]): @@ -300,14 +300,14 @@ def vacuum(self): f.seek(pos) val = f.read(siz) - f.seek(vacuum_pos) + f.seek(reorganize_pos) f.write(val) - self._index[key] = (vacuum_pos, siz) + self._index[key] = (reorganize_pos, siz) blocks_occupied = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE - vacuum_pos += blocks_occupied * _BLOCKSIZE + reorganize_pos += blocks_occupied * _BLOCKSIZE - f.truncate(vacuum_pos) + f.truncate(reorganize_pos) # Commit changes to index, which were not in-place. self._commit() diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py index db41baccbac1f4..b296a1bcd1bbfa 100644 --- a/Lib/dbm/sqlite3.py +++ b/Lib/dbm/sqlite3.py @@ -15,7 +15,7 @@ STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))" DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)" ITER_KEYS = "SELECT key FROM Dict" -VACUUM = "VACUUM" +REORGANIZE = "VACUUM" class error(OSError): @@ -123,8 +123,8 @@ def __enter__(self): def __exit__(self, *args): self.close() - def vacuum(self): - self._execute(VACUUM) + def reorganize(self): + self._execute(REORGANIZE) def open(filename, /, flag="r", mode=0o666): diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 81babd1f6534c8..7c4fbfa5456d0f 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -135,31 +135,31 @@ def test_anydbm_access(self): assert(f[key] == b"Python:") f.close() - def test_anydbm_readonly_vacuum(self): + def test_anydbm_readonly_reorganize(self): self.init_db() with dbm.open(_fname, 'r') as d: # Early stopping. - if not hasattr(d, 'vacuum'): + if not hasattr(d, 'reorganize'): return - self.assertRaises(dbm.error, lambda: d.vacuum()) + self.assertRaises(dbm.error, lambda: d.reorganize()) - def test_anydbm_vacuum_not_changed_content(self): + def test_anydbm_reorganize_not_changed_content(self): self.init_db() with dbm.open(_fname, 'c') as d: # Early stopping. - if not hasattr(d, 'vacuum'): + if not hasattr(d, 'reorganize'): return keys_before = sorted(d.keys()) values_before = [d[k] for k in keys_before] - d.vacuum() + d.reorganize() keys_after = sorted(d.keys()) values_after = [d[k] for k in keys_before] self.assertEqual(keys_before, keys_after) self.assertEqual(values_before, values_after) - def test_anydbm_vacuum_decreased_size(self): + def test_anydbm_reorganize_decreased_size(self): def _calculate_db_size(db_path): if os.path.isfile(db_path): @@ -171,10 +171,10 @@ def _calculate_db_size(db_path): total_size += os.path.getsize(file_path) return total_size - # This test requires relatively large databases to reliably show difference in size before and after vacuum. + # This test requires relatively large databases to reliably show difference in size before and after reorganizing. with dbm.open(_fname, 'n') as f: # Early stopping. - if not hasattr(f, 'vacuum'): + if not hasattr(f, 'reorganize'): return for k in self._dict: @@ -182,20 +182,19 @@ def _calculate_db_size(db_path): db_keys = list(f.keys()) # Make sure to calculate size of database only after file is closed to ensure file content are flushed to disk. - size_before = _calculate_db_size(_fname) + size_before = _calculate_db_size(os.path.dirname(_fname)) # Delete some elements from the start of the database. keys_to_delete = db_keys[:len(db_keys) // 2] with dbm.open(_fname, 'c') as f: for k in keys_to_delete: del f[k] - f.vacuum() + f.reorganize() # Make sure to calculate size of database only after file is closed to ensure file content are flushed to disk. - size_after = _calculate_db_size(_fname) + size_after = _calculate_db_size(os.path.dirname(_fname)) - # Less or equal because not all submodules support vacuuming. - self.assertLessEqual(size_after, size_before) + self.assertLess(size_after, size_before) def test_open_with_bytes(self): dbm.open(os.fsencode(_fname), "c").close() diff --git a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst index 053f93a00965c4..aaee4ea16e8007 100644 --- a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst +++ b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst @@ -1,2 +1,2 @@ -:mod:`!dbm.dumb` and :mod:`!dbm.sqlite` now have :meth:`!vacuum` methods to +:mod:`!dbm.dumb` and :mod:`!dbm.sqlite` now have :meth:`!reorganize` methods to recover unused free space previously occupied by deleted entries. From 19c0c8da63b69aaf27ce749ee12c5ad4b812f762 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 12:53:24 +0200 Subject: [PATCH 09/20] Added .reorganize() method in shelve to expose dbm submodule's own .reorganize() --- Lib/shelve.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/shelve.py b/Lib/shelve.py index 50584716e9ea64..b53dc8b7a8ece9 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -171,6 +171,11 @@ def sync(self): if hasattr(self.dict, 'sync'): self.dict.sync() + def reorganize(self): + self.sync() + if hasattr(self.dict, 'reorganize'): + self.dict.reorganize() + class BsdDbShelf(Shelf): """Shelf implementation using the "BSD" db interface. From 88b40141a725c9ca232382a308cd40fa1d0850c2 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 13:03:23 +0200 Subject: [PATCH 10/20] Added documentation for shelve.reorganize --- Doc/library/shelve.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 6e74a59b82b8ec..1e890ce91e3436 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -75,8 +75,13 @@ Two additional methods are supported: Write back all entries in the cache if the shelf was opened with *writeback* set to :const:`True`. Also empty the cache and synchronize the persistent - dictionary on disk, if feasible. This is called automatically when the shelf - is closed with :meth:`close`. + dictionary on disk, if feasible. This is called automatically when + :meth:`reorganize` is called or the shelf is closed with :meth:`close`. + +.. method:: Shelf.reorganize() + + Calls :meth:`sync` and attempts to shrink space used on disk by removing empty + space resulting from deletions. .. method:: Shelf.close() @@ -116,6 +121,11 @@ Restrictions * On macOS :mod:`dbm.ndbm` can silently corrupt the database file on updates, which can cause hard crashes when trying to read from the database. +* :meth:`reorganize` may not be available for all database packages and + may temporarely increase resource usage (especially disk space) when called. + Additionally, it will never run automatically and instead needs to be called + explicitly. + .. class:: Shelf(dict, protocol=None, writeback=False, keyencoding='utf-8') From 5c1d45f1a20ca50d84acf37816bb74a5c9121147 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 13:11:35 +0200 Subject: [PATCH 11/20] Fixed link in doc --- Doc/library/shelve.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 1e890ce91e3436..2a48815c4e0b64 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -121,7 +121,7 @@ Restrictions * On macOS :mod:`dbm.ndbm` can silently corrupt the database file on updates, which can cause hard crashes when trying to read from the database. -* :meth:`reorganize` may not be available for all database packages and +* :meth:`Shelf.reorganize` may not be available for all database packages and may temporarely increase resource usage (especially disk space) when called. Additionally, it will never run automatically and instead needs to be called explicitly. From 992e7aa6d3598a4f5a720827eeca1084be92abe8 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 13:14:12 +0200 Subject: [PATCH 12/20] Updated news --- .../next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst index aaee4ea16e8007..a9a56d9239b305 100644 --- a/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst +++ b/Misc/NEWS.d/next/Library/2025-05-15-00-27-09.gh-issue-134004.e8k4-R.rst @@ -1,2 +1,2 @@ -:mod:`!dbm.dumb` and :mod:`!dbm.sqlite` now have :meth:`!reorganize` methods to +:mod:`shelve` as well as underlying :mod:`!dbm.dumb` and :mod:`!dbm.sqlite` now have :meth:`!reorganize` methods to recover unused free space previously occupied by deleted entries. From b96480b13463b88f8bf8b9edf0a6d534527e3585 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Thu, 15 May 2025 13:45:50 +0200 Subject: [PATCH 13/20] PR review: removed unnecessary .keys() --- Lib/dbm/dumb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index 7b1a5a31801bca..a42628db72f1a9 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -295,7 +295,7 @@ def reorganize(self): reorganize_pos = 0 # Iterate over existing keys, sorted by starting byte. - for key in sorted(self._index.keys(), key = lambda k: self._index[k][0]): + for key in sorted(self._index, key = lambda k: self._index[k][0]): pos, siz = self._index[key] f.seek(pos) val = f.read(siz) From 4c23b648367660e2a752c6f4aaa518f2ef72dd85 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Sat, 17 May 2025 09:52:19 +0200 Subject: [PATCH 14/20] Updated documentation to correct notes indentation --- Doc/library/dbm.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 40789c29a14a3e..995ac44faf5234 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -198,9 +198,9 @@ or any other SQLite browser, including the SQLite CLI. used on disk, this method will reorganize the database; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. - .. note:: - While reorganizing, as much as twice the size of the original database is required - in free disk space. + .. note:: + While reorganizing, as much as two times the size of the original database is required + in free disk space. However, be aware that this factor changes for each :mod:`dbm` submodule. :mod:`dbm.gnu` --- GNU database manager @@ -300,6 +300,10 @@ functionality like crash tolerance. reorganization; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. + .. note:: + While reorganizing, as much as one time the size of the original database is required + in free disk space. However, be aware that this factor changes for each :mod:`dbm` submodule. + .. method:: gdbm.sync() When the database has been opened in fast mode, this method forces any @@ -485,3 +489,7 @@ The :mod:`!dbm.dumb` module defines the following: If you have carried out a lot of deletions and would like to shrink the space used on disk, this method will reorganize the database; otherwise, deleted file space will not be reused. + + .. note:: + While reorganizing, no additional free disk space is required. However, be aware + that this factor changes for each :mod:`dbm` submodule. From 8a809770e3dfe62752e469dfb22c0b2760b1b91c Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Sat, 17 May 2025 09:56:17 +0200 Subject: [PATCH 15/20] Left previously removed comment as requested in PR --- Lib/dbm/dumb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/dbm/dumb.py b/Lib/dbm/dumb.py index a42628db72f1a9..1bc239a84fff83 100644 --- a/Lib/dbm/dumb.py +++ b/Lib/dbm/dumb.py @@ -8,6 +8,9 @@ - seems to contain a bug when updating... +- reclaim free space (currently, space once occupied by deleted or expanded +items is not reused exept if .reorganize() is called) + - support concurrent access (currently, if two processes take turns making updates, they can mess up the index) From 166a55305c90916ddc7b5535a4151382ccd386c8 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Sat, 17 May 2025 10:37:06 +0200 Subject: [PATCH 16/20] Modified documentation of dbm.dumb warning to align with shelve warning --- Doc/library/dbm.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 995ac44faf5234..faf58a96cef461 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -459,7 +459,9 @@ The :mod:`!dbm.dumb` module defines the following: Python's AST compiler. .. warning:: - :mod:`dbm.dumb` does not support concurrent writes, which can corrupt the database. + :mod:`dbm.dumb` does not support concurrent read/write access. (Multiple + simultaneous read accesses are safe.) When a program has the database open + for writing, no other program should have it open for reading or writing. .. versionchanged:: 3.5 :func:`~dbm.dumb.open` always creates a new database when *flag* is ``'n'``. From 6f34de54bf9a61e07856020cf24e4434708877f8 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 28 May 2025 18:40:10 +0200 Subject: [PATCH 17/20] Skipping test instead of succeeding if method not implemented for submodule --- Lib/test/test_dbm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 7c4fbfa5456d0f..fc70a9711238ae 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -140,7 +140,7 @@ def test_anydbm_readonly_reorganize(self): with dbm.open(_fname, 'r') as d: # Early stopping. if not hasattr(d, 'reorganize'): - return + self.skipTest(f"method reorganize not available this dbm submodule") self.assertRaises(dbm.error, lambda: d.reorganize()) @@ -149,7 +149,7 @@ def test_anydbm_reorganize_not_changed_content(self): with dbm.open(_fname, 'c') as d: # Early stopping. if not hasattr(d, 'reorganize'): - return + self.skipTest(f"method reorganize not available this dbm submodule") keys_before = sorted(d.keys()) values_before = [d[k] for k in keys_before] @@ -175,7 +175,7 @@ def _calculate_db_size(db_path): with dbm.open(_fname, 'n') as f: # Early stopping. if not hasattr(f, 'reorganize'): - return + self.skipTest(f"method reorganize not available this dbm submodule") for k in self._dict: f[k.encode('ascii')] = self._dict[k] * 100000 From 059ad8221e7e3994b46e5038154a5d310d1f98b0 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 28 May 2025 18:44:59 +0200 Subject: [PATCH 18/20] Converted redundant f-string to regular string --- Lib/test/test_dbm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index fc70a9711238ae..1503100530f895 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -140,7 +140,7 @@ def test_anydbm_readonly_reorganize(self): with dbm.open(_fname, 'r') as d: # Early stopping. if not hasattr(d, 'reorganize'): - self.skipTest(f"method reorganize not available this dbm submodule") + self.skipTest("method reorganize not available this dbm submodule") self.assertRaises(dbm.error, lambda: d.reorganize()) @@ -149,7 +149,7 @@ def test_anydbm_reorganize_not_changed_content(self): with dbm.open(_fname, 'c') as d: # Early stopping. if not hasattr(d, 'reorganize'): - self.skipTest(f"method reorganize not available this dbm submodule") + self.skipTest("method reorganize not available this dbm submodule") keys_before = sorted(d.keys()) values_before = [d[k] for k in keys_before] @@ -175,7 +175,7 @@ def _calculate_db_size(db_path): with dbm.open(_fname, 'n') as f: # Early stopping. if not hasattr(f, 'reorganize'): - self.skipTest(f"method reorganize not available this dbm submodule") + self.skipTest("method reorganize not available this dbm submodule") for k in self._dict: f[k.encode('ascii')] = self._dict[k] * 100000 From 3e7049fe9e4e00ed506647061ffc723a706b1f3d Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 28 May 2025 19:33:32 +0200 Subject: [PATCH 19/20] Added versionadded to method documentations --- Doc/library/dbm.rst | 3 +++ Doc/library/shelve.rst | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index faf58a96cef461..6f548fbb1b39d8 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -202,6 +202,7 @@ or any other SQLite browser, including the SQLite CLI. While reorganizing, as much as two times the size of the original database is required in free disk space. However, be aware that this factor changes for each :mod:`dbm` submodule. + .. versionadded:: next :mod:`dbm.gnu` --- GNU database manager --------------------------------------- @@ -495,3 +496,5 @@ The :mod:`!dbm.dumb` module defines the following: .. note:: While reorganizing, no additional free disk space is required. However, be aware that this factor changes for each :mod:`dbm` submodule. + + .. versionadded:: next diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 2a48815c4e0b64..23a2e0c3d0c758 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -83,6 +83,8 @@ Two additional methods are supported: Calls :meth:`sync` and attempts to shrink space used on disk by removing empty space resulting from deletions. + .. versionadded:: next + .. method:: Shelf.close() Synchronize and close the persistent *dict* object. Operations on a closed From 2f5af3805ed077b4604067577d13935184a19717 Mon Sep 17 00:00:00 2001 From: Andrea-Oliveri Date: Wed, 28 May 2025 20:02:38 +0200 Subject: [PATCH 20/20] Added whatsnew entries --- Doc/whatsnew/3.15.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 8cf5238e6cc49a..5c5e64b88511ee 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -89,6 +89,22 @@ New modules Improved modules ================ +dbm +--- + +* Added new :meth:`!reorganize` methods to :mod:`dbm.dumb` and :mod:`dbm.sqlite3` + which allow to recover unused free space previously occupied by deleted entries. + (Contributed by Andrea Oliveri in :gh:`134004`.) + + +shelve +------ + +* Added new :meth:`!reorganize` method to :mod:`shelve` used to recover unused free + space previously occupied by deleted entries. + (Contributed by Andrea Oliveri in :gh:`134004`.) + + ssl ---