ruff format

paradigmxyz · Feb 11, 2024 · 4a0a8e4 · 4a0a8e4
1 parent 470f190
commit 4a0a8e4
Show file tree

Hide file tree

Showing 13 changed files with 51 additions and 78 deletions.
diff --git a/state_growth/datasets/balance_diffs.py b/state_growth/datasets/balance_diffs.py
@@ -24,8 +24,5 @@ def aggregate_balance_diffs(
     df: state_growth.FrameType, *, group_by: str = 'block_number'
 ) -> state_growth.FrameType:
     return (
-        df.group_by(group_by)
-        .agg(**state_growth.get_schema_agg(schema))
-        .sort(group_by)
+        df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
     )
-
diff --git a/state_growth/datasets/balance_reads.py b/state_growth/datasets/balance_reads.py
@@ -19,8 +19,5 @@ def aggregate_balance_reads(
     df: state_growth.FrameType, *, group_by: str = 'block_number'
 ) -> state_growth.FrameType:
     return (
-        df.group_by(group_by)
-        .agg(**state_growth.get_schema_agg(schema))
-        .sort(group_by)
+        df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
     )
-
diff --git a/state_growth/datasets/contracts.py b/state_growth/datasets/contracts.py
@@ -32,8 +32,5 @@ def aggregate_contracts(
     df: state_growth.FrameType, *, group_by: str = 'block_number'
 ) -> state_growth.FrameType:
     return (
-        df.group_by(group_by)
-        .agg(**state_growth.get_schema_agg(schema))
-        .sort(group_by)
+        df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
     )
-
diff --git a/state_growth/datasets/io_per_contract/agg_by_contract.py b/state_growth/datasets/io_per_contract/agg_by_contract.py
@@ -8,9 +8,9 @@
 def join_contracts(df: pl.DataFrame, contracts: pl.DataFrame) -> pl.DataFrame:
     return df.join(
         contracts,
-        left_on="address",
-        right_on="contract_address",
-        how="left",
+        left_on='address',
+        right_on='contract_address',
+        how='left',
     )
 
 
@@ -43,9 +43,9 @@ def agg_by_contract_field(
 def agg_by_contract_fields(
     agg: pl.DataFrame, contracts: pl.DataFrame
 ) -> typing.Mapping[str, pl.DataFrame]:
-    agg_by_init_code = agg_by_contract_field(
-        agg, contracts, 'init_code_hash'
-    ).sort('prop_of_n_creates', descending=True)
+    agg_by_init_code = agg_by_contract_field(agg, contracts, 'init_code_hash').sort(
+        'prop_of_n_creates', descending=True
+    )
     agg_by_factory = agg_by_contract_field(agg, contracts, 'factory').sort(
         'prop_of_n_creates', descending=True
     )
@@ -86,4 +86,3 @@ def plot_cumulative_creates_by_contract_fields(
     plt.legend(loc='lower right')
     toolplot.add_tick_grid()
     plt.title('Cumulative distribution of creates\nby contract fields')
-
diff --git a/state_growth/datasets/io_per_contract/io_agg.py b/state_growth/datasets/io_per_contract/io_agg.py
@@ -40,39 +40,29 @@ def aggregate_contract_slot_diffs(df: pl.DataFrame, time_column: str) -> pl.Data
 
 def compute_contract_slot_agg_proportions(agg: pl.DataFrame) -> pl.DataFrame:
     exprs = dict(
-        prop_slot_writes=pl.sum("n_slot_writes") / agg["n_slot_writes"].sum(),
-        prop_slot_creates=pl.sum("n_slot_creates")
-        / agg["n_slot_creates"].sum(),
-        prop_slot_updates=pl.sum("n_slot_updates")
-        / agg["n_slot_updates"].sum(),
-        prop_slot_deletes=pl.sum("n_slot_deletes")
-        / agg["n_slot_deletes"].sum(),
+        prop_slot_writes=pl.sum('n_slot_writes') / agg['n_slot_writes'].sum(),
+        prop_slot_creates=pl.sum('n_slot_creates') / agg['n_slot_creates'].sum(),
+        prop_slot_updates=pl.sum('n_slot_updates') / agg['n_slot_updates'].sum(),
+        prop_slot_deletes=pl.sum('n_slot_deletes') / agg['n_slot_deletes'].sum(),
     )
 
     if 'n_unique_written_slots' in agg.columns:
         exprs['prop_unique_slot_writes'] = (
-            pl.sum("n_unique_written_slots")
-            / agg["n_unique_written_slots"].sum()
+            pl.sum('n_unique_written_slots') / agg['n_unique_written_slots'].sum()
         )
     if 'n_unique_created_slots' in agg.columns:
         exprs['prop_unique_slot_creates'] = (
-            pl.sum("n_unique_created_slots")
-            / agg["n_unique_created_slots"].sum()
+            pl.sum('n_unique_created_slots') / agg['n_unique_created_slots'].sum()
         )
     if 'n_unique_updated_slots' in agg.columns:
         exprs['prop_unique_slot_updates'] = (
-            pl.sum("n_unique_updated_slots")
-            / agg["n_unique_updated_slots"].sum()
+            pl.sum('n_unique_updated_slots') / agg['n_unique_updated_slots'].sum()
         )
     if 'n_unique_deleted_slots' in agg.columns:
         exprs['prop_unique_slot_deletes'] = (
-            pl.sum("n_unique_deleted_slots")
-            / agg["n_unique_deleted_slots"].sum()
+            pl.sum('n_unique_deleted_slots') / agg['n_unique_deleted_slots'].sum()
         )
 
     return (
-        agg.group_by("address")
-        .agg(**exprs)
-        .sort("prop_slot_writes", descending=True)
+        agg.group_by('address').agg(**exprs).sort('prop_slot_writes', descending=True)
     )
-
diff --git a/state_growth/datasets/logs.py b/state_growth/datasets/logs.py
@@ -114,4 +114,3 @@ def aggregate_logs(
         .join(erc721_transfers_per_block, on=group_by, how='outer_coalesce')
         .join(erc721_approvals_per_block, on=group_by, how='outer_coalesce')
     )
-
diff --git a/state_growth/datasets/node_dbs/agg_reth_data.py b/state_growth/datasets/node_dbs/agg_reth_data.py
@@ -9,22 +9,22 @@
 
 def aggregate_by_entity(reth: pl.DataFrame) -> pl.DataFrame:
     return (
-        reth.group_by("Scales with # of")
-        .agg(pl.sum("total_bytes"), pl.max("# Entries"))
+        reth.group_by('Scales with # of')
+        .agg(pl.sum('total_bytes'), pl.max('# Entries'))
         .sort('total_bytes', descending=True)
-        .filter(pl.col("Scales with # of") != "-")
+        .filter(pl.col('Scales with # of') != '-')
         .with_columns(
-            bytes_per_entity=pl.col.total_bytes / pl.col("# Entries"),
+            bytes_per_entity=pl.col.total_bytes / pl.col('# Entries'),
             total_bytes_str=pl.col.total_bytes.map_elements(
                 lambda x: toolstr.format_nbytes(int(x))
-            )
+            ),
         )
     )
 
 
 def compute_bytes_per_entity(reth: pl.DataFrame) -> typing.Mapping[str, float]:
     agg = aggregate_by_entity(reth)
-    return dict(agg[["Scales with # of", "bytes_per_entity"]].rows())
+    return dict(agg[['Scales with # of', 'bytes_per_entity']].rows())
 
 
 # more precise aggregations, including logarithmic model, WIP
@@ -136,4 +136,3 @@ def compute_bytes_per_entity(reth: pl.DataFrame) -> typing.Mapping[str, float]:
 #     print('n_bytes_per_slot:        ', n_bytes_per_slot)
 #     print('n_bytes_per_block:       ', n_bytes_per_block)
 #     print('n_bytes_per_transaction: ', n_bytes_per_transaction)
-
diff --git a/state_growth/datasets/node_dbs/load_reth_data.py b/state_growth/datasets/node_dbs/load_reth_data.py
@@ -7,24 +7,24 @@ def load_reth_db_sizes(path: str) -> pl.DataFrame:
     reth = pl.read_csv(path)
 
     reth = reth.with_columns(
-        pl.col("# Entries").map_elements(lambda x: float(x.replace(",", "")))
+        pl.col('# Entries').map_elements(lambda x: float(x.replace(',', '')))
     )
 
     total_bytes = (
         reth.with_columns(
-            byte_number=pl.col.Size.str.split(" ").list.get(0),
-            byte_suffix=pl.col.Size.str.split(" ").list.get(1),
+            byte_number=pl.col.Size.str.split(' ').list.get(0),
+            byte_suffix=pl.col.Size.str.split(' ').list.get(1),
         )
         .with_columns(
-            multiplier=pl.when(pl.col.byte_suffix == "B")
+            multiplier=pl.when(pl.col.byte_suffix == 'B')
             .then(1)
-            .when(pl.col.byte_suffix == "KiB")
+            .when(pl.col.byte_suffix == 'KiB')
             .then(1024)
-            .when(pl.col.byte_suffix == "MiB")
+            .when(pl.col.byte_suffix == 'MiB')
             .then(1024**2)
-            .when(pl.col.byte_suffix == "GiB")
+            .when(pl.col.byte_suffix == 'GiB')
             .then(1024**3)
-            .when(pl.col.byte_suffix == "TiB")
+            .when(pl.col.byte_suffix == 'TiB')
             .then(1024**4)
             .otherwise(-9999999999999),
             byte_number=pl.col.byte_number.cast(pl.Float64),
@@ -35,10 +35,10 @@ def load_reth_db_sizes(path: str) -> pl.DataFrame:
     reth = reth.with_columns(total_bytes)
 
     reth = reth.with_columns(
-        pl.when(pl.col("Scales with # of") == pl.lit("TxSenders"))
-        .then(pl.lit("Transactions"))
-        .otherwise(pl.col("Scales with # of"))
-        .alias("Scales with # of")
+        pl.when(pl.col('Scales with # of') == pl.lit('TxSenders'))
+        .then(pl.lit('Transactions'))
+        .otherwise(pl.col('Scales with # of'))
+        .alias('Scales with # of')
     )
 
     return reth
diff --git a/state_growth/datasets/storage_diffs.py b/state_growth/datasets/storage_diffs.py
@@ -40,8 +40,5 @@ def aggregate_storage_diffs(
     df: state_growth.FrameType, *, group_by: str = 'block_number'
 ) -> state_growth.FrameType:
     return (
-        df.group_by(group_by)
-        .agg(**state_growth.get_schema_agg(schema))
-        .sort(group_by)
+        df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
     )
-
diff --git a/state_growth/datasets/storage_reads.py b/state_growth/datasets/storage_reads.py
@@ -24,8 +24,5 @@ def aggregate_storage_reads(
     df: state_growth.FrameType, *, group_by: str = 'block_number'
 ) -> state_growth.FrameType:
     return (
-        df.group_by(group_by)
-        .agg(**state_growth.get_schema_agg(schema))
-        .sort(group_by)
+        df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
     )
-
diff --git a/state_growth/datasets/transactions.py b/state_growth/datasets/transactions.py
@@ -35,8 +35,5 @@ def aggregate_transactions(
     df: state_growth.FrameType, *, group_by: str = 'block_number'
 ) -> state_growth.FrameType:
     return (
-        df.group_by(group_by)
-        .agg(**state_growth.get_schema_agg(schema))
-        .sort(group_by)
+        df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
     )
-
diff --git a/state_growth/timestamps/timestamps_load.py b/state_growth/timestamps/timestamps_load.py
@@ -44,4 +44,3 @@ def load_block_timestamps(
         )
 
     return block_timestamps
-
diff --git a/state_growth/transforms/transform_utils.py b/state_growth/transforms/transform_utils.py
@@ -48,14 +48,18 @@ def transform_chunks(
         print('transforming', len(chunks), 'chunks:')
         for chunk in chunks:
             print('-', chunk)
-        print()
 
     block_timestamps = state_growth.load_block_timestamps(**context)
 
     for interval_type in time_columns:
         output_datatype = output_datatype_template.format(
             interval=state_growth.interval_type_names[interval_type]
         )
+
+        if verbose:
+            print()
+            print('doing', output_datatype)
+
         time_column_name = interval_type
         raw_time_data = state_growth.get_block_time_interval(
             block_timestamps, state_growth.interval_type_names[interval_type]
@@ -112,11 +116,13 @@ def transform_chunk(
         context['network'] + '_' + 'state_growth',
         output_datatype,
     )
-    filename = '{network}__{datatype}__{start_block:08}_to_{end_block:08}.parquet'.format(
-        network=context['network'],
-        datatype=output_datatype,
-        start_block=start_block,
-        end_block=end_block,
+    filename = (
+        '{network}__{datatype}__{start_block:08}_to_{end_block:08}.parquet'.format(
+            network=context['network'],
+            datatype=output_datatype,
+            start_block=start_block,
+            end_block=end_block,
+        )
     )
     os.makedirs(parent_dir, exist_ok=True)
     path = os.path.join(parent_dir, filename)
@@ -191,4 +197,3 @@ def transform_chunk(
     tmp_path = path + '_tmp'
     transformed.write_parquet(tmp_path)
     shutil.move(tmp_path, path)
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -114,4 +114,3 @@ def aggregate_logs(
		.join(erc721_transfers_per_block, on=group_by, how='outer_coalesce')
		.join(erc721_approvals_per_block, on=group_by, how='outer_coalesce')
		)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -44,4 +44,3 @@ def load_block_timestamps(
		)

		return block_timestamps