Update docs and version tick 0.2.0

ethho · Jan 20, 2023 · cfe557d · cfe557d
1 parent d8c6982
commit cfe557d
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # memoize.py
 
-This repo contains a single decorator factory `memoize` that manages a local file cache of function results.
+This repo contains a decorator factory `memoize` that manages a local file cache of function results.
 The cache is stored as a JSON file.
 
 ## Quick Start
@@ -27,11 +27,63 @@ def my_func(s: str, b: bool = True, opt=None):
     return {"s": s, "b": b, "opt": opt}
 ```
 
+## Memoize Pandas DataFrames
+
+The `memoize_df` decorator caches the `pandas.DataFrame` returned from a function to a CSV file.
+The `pandas` module must be installed to use this feature:
+
+```bash
+python3 -m pip install pandas
+```
+
+The `memoize_df` decorator factory can be used for any function that returns a `pandas.DataFrame`.
+While `memoize` stores the results of many calls in one cache file, `memoize_df` stores the DataFrame produced for exactly one call in the cache file.
+Also note that DataFrame index will be written to the CSV cache _if and only if_ the index has a non-null `name` attribute.
+
+```python
+import pandas as pd
+from memoize.dataframe import memoize_df
+
+
+@memoize_df(cache_dir='/tmp/memoize')
+def make_dataframe(foo: int):
+    df = pd.DataFrame(data=reversed(range(foo)), index=range(foo), columns=['my_column'])
+    df.index.name = 'my_index'
+    return df
+
+
+print(make_dataframe(4))
+# Using cache fp='/tmp/memoize/make_dataframe_20230120.csv' to write results of function make_dataframe
+#           my_column
+# my_index
+# 0                 3
+# 1                 2
+# 2                 1
+# 3                 0
+
+print(make_dataframe(3))
+# Using cached call from /tmp/memoize/make_dataframe_20230120.csv
+#    my_index  my_column
+# 0         0          3
+# 1         1          2
+# 2         2          1
+# 3         3          0
+
+print(make_dataframe(4))
+# Using cached call from /tmp/memoize/make_dataframe_20230120.csv
+#    my_index  my_column
+# 0         0          3
+# 1         1          2
+# 2         2          1
+# 3         3          0
+```
+
 ## License
 
 MIT
 
 ## Limitations
 
-Args, kwargs, and function return value must be JSON-serializable.
+Args, kwargs, and function return value must be JSON-serializable if using the `memoize` decorator.
+The return value of the wrapped function must be a `pandas.DataFrame` when using the `memoize_df` decorator.
 The entire contents of the date-stamped cache file will be read and written on every function call, which may post I/O challenges.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "memoize"
-version = "0.1.0"
+version = "0.2.0"
 description = "Python3 memoization decorator"
 authors = ["Ethan Ho <[email protected]>"]
 license = "MIT"

diff --git a/src/memoize/dataframe.py b/src/memoize/dataframe.py
@@ -64,6 +64,12 @@ def memoize_dec(*args, **kwargs):
 
             # Else run the function and store cached result
             result = func(*args, **kwargs)
+
+            if not isinstance(result, pd.DataFrame):
+                raise Exception(
+                    f"Failed to write return value of function '{funcname}' to CSV file. "
+                    f"Expected a pandas.DataFrame, received {type(result)}."
+                )
             _write_csv(fp, result)
             return result
         return memoize_dec