Skip to content

Commit

Permalink
ruff format
Browse files Browse the repository at this point in the history
  • Loading branch information
sslivkoff committed Feb 11, 2024
1 parent 470f190 commit 4a0a8e4
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 78 deletions.
5 changes: 1 addition & 4 deletions state_growth/datasets/balance_diffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,5 @@ def aggregate_balance_diffs(
df: state_growth.FrameType, *, group_by: str = 'block_number'
) -> state_growth.FrameType:
return (
df.group_by(group_by)
.agg(**state_growth.get_schema_agg(schema))
.sort(group_by)
df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
)

5 changes: 1 addition & 4 deletions state_growth/datasets/balance_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,5 @@ def aggregate_balance_reads(
df: state_growth.FrameType, *, group_by: str = 'block_number'
) -> state_growth.FrameType:
return (
df.group_by(group_by)
.agg(**state_growth.get_schema_agg(schema))
.sort(group_by)
df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
)

5 changes: 1 addition & 4 deletions state_growth/datasets/contracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,5 @@ def aggregate_contracts(
df: state_growth.FrameType, *, group_by: str = 'block_number'
) -> state_growth.FrameType:
return (
df.group_by(group_by)
.agg(**state_growth.get_schema_agg(schema))
.sort(group_by)
df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
)

13 changes: 6 additions & 7 deletions state_growth/datasets/io_per_contract/agg_by_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
def join_contracts(df: pl.DataFrame, contracts: pl.DataFrame) -> pl.DataFrame:
return df.join(
contracts,
left_on="address",
right_on="contract_address",
how="left",
left_on='address',
right_on='contract_address',
how='left',
)


Expand Down Expand Up @@ -43,9 +43,9 @@ def agg_by_contract_field(
def agg_by_contract_fields(
agg: pl.DataFrame, contracts: pl.DataFrame
) -> typing.Mapping[str, pl.DataFrame]:
agg_by_init_code = agg_by_contract_field(
agg, contracts, 'init_code_hash'
).sort('prop_of_n_creates', descending=True)
agg_by_init_code = agg_by_contract_field(agg, contracts, 'init_code_hash').sort(
'prop_of_n_creates', descending=True
)
agg_by_factory = agg_by_contract_field(agg, contracts, 'factory').sort(
'prop_of_n_creates', descending=True
)
Expand Down Expand Up @@ -86,4 +86,3 @@ def plot_cumulative_creates_by_contract_fields(
plt.legend(loc='lower right')
toolplot.add_tick_grid()
plt.title('Cumulative distribution of creates\nby contract fields')

28 changes: 9 additions & 19 deletions state_growth/datasets/io_per_contract/io_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,29 @@ def aggregate_contract_slot_diffs(df: pl.DataFrame, time_column: str) -> pl.Data

def compute_contract_slot_agg_proportions(agg: pl.DataFrame) -> pl.DataFrame:
exprs = dict(
prop_slot_writes=pl.sum("n_slot_writes") / agg["n_slot_writes"].sum(),
prop_slot_creates=pl.sum("n_slot_creates")
/ agg["n_slot_creates"].sum(),
prop_slot_updates=pl.sum("n_slot_updates")
/ agg["n_slot_updates"].sum(),
prop_slot_deletes=pl.sum("n_slot_deletes")
/ agg["n_slot_deletes"].sum(),
prop_slot_writes=pl.sum('n_slot_writes') / agg['n_slot_writes'].sum(),
prop_slot_creates=pl.sum('n_slot_creates') / agg['n_slot_creates'].sum(),
prop_slot_updates=pl.sum('n_slot_updates') / agg['n_slot_updates'].sum(),
prop_slot_deletes=pl.sum('n_slot_deletes') / agg['n_slot_deletes'].sum(),
)

if 'n_unique_written_slots' in agg.columns:
exprs['prop_unique_slot_writes'] = (
pl.sum("n_unique_written_slots")
/ agg["n_unique_written_slots"].sum()
pl.sum('n_unique_written_slots') / agg['n_unique_written_slots'].sum()
)
if 'n_unique_created_slots' in agg.columns:
exprs['prop_unique_slot_creates'] = (
pl.sum("n_unique_created_slots")
/ agg["n_unique_created_slots"].sum()
pl.sum('n_unique_created_slots') / agg['n_unique_created_slots'].sum()
)
if 'n_unique_updated_slots' in agg.columns:
exprs['prop_unique_slot_updates'] = (
pl.sum("n_unique_updated_slots")
/ agg["n_unique_updated_slots"].sum()
pl.sum('n_unique_updated_slots') / agg['n_unique_updated_slots'].sum()
)
if 'n_unique_deleted_slots' in agg.columns:
exprs['prop_unique_slot_deletes'] = (
pl.sum("n_unique_deleted_slots")
/ agg["n_unique_deleted_slots"].sum()
pl.sum('n_unique_deleted_slots') / agg['n_unique_deleted_slots'].sum()
)

return (
agg.group_by("address")
.agg(**exprs)
.sort("prop_slot_writes", descending=True)
agg.group_by('address').agg(**exprs).sort('prop_slot_writes', descending=True)
)

1 change: 0 additions & 1 deletion state_growth/datasets/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,3 @@ def aggregate_logs(
.join(erc721_transfers_per_block, on=group_by, how='outer_coalesce')
.join(erc721_approvals_per_block, on=group_by, how='outer_coalesce')
)

13 changes: 6 additions & 7 deletions state_growth/datasets/node_dbs/agg_reth_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@

def aggregate_by_entity(reth: pl.DataFrame) -> pl.DataFrame:
return (
reth.group_by("Scales with # of")
.agg(pl.sum("total_bytes"), pl.max("# Entries"))
reth.group_by('Scales with # of')
.agg(pl.sum('total_bytes'), pl.max('# Entries'))
.sort('total_bytes', descending=True)
.filter(pl.col("Scales with # of") != "-")
.filter(pl.col('Scales with # of') != '-')
.with_columns(
bytes_per_entity=pl.col.total_bytes / pl.col("# Entries"),
bytes_per_entity=pl.col.total_bytes / pl.col('# Entries'),
total_bytes_str=pl.col.total_bytes.map_elements(
lambda x: toolstr.format_nbytes(int(x))
)
),
)
)


def compute_bytes_per_entity(reth: pl.DataFrame) -> typing.Mapping[str, float]:
agg = aggregate_by_entity(reth)
return dict(agg[["Scales with # of", "bytes_per_entity"]].rows())
return dict(agg[['Scales with # of', 'bytes_per_entity']].rows())


# more precise aggregations, including logarithmic model, WIP
Expand Down Expand Up @@ -136,4 +136,3 @@ def compute_bytes_per_entity(reth: pl.DataFrame) -> typing.Mapping[str, float]:
# print('n_bytes_per_slot: ', n_bytes_per_slot)
# print('n_bytes_per_block: ', n_bytes_per_block)
# print('n_bytes_per_transaction: ', n_bytes_per_transaction)

24 changes: 12 additions & 12 deletions state_growth/datasets/node_dbs/load_reth_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@ def load_reth_db_sizes(path: str) -> pl.DataFrame:
reth = pl.read_csv(path)

reth = reth.with_columns(
pl.col("# Entries").map_elements(lambda x: float(x.replace(",", "")))
pl.col('# Entries').map_elements(lambda x: float(x.replace(',', '')))
)

total_bytes = (
reth.with_columns(
byte_number=pl.col.Size.str.split(" ").list.get(0),
byte_suffix=pl.col.Size.str.split(" ").list.get(1),
byte_number=pl.col.Size.str.split(' ').list.get(0),
byte_suffix=pl.col.Size.str.split(' ').list.get(1),
)
.with_columns(
multiplier=pl.when(pl.col.byte_suffix == "B")
multiplier=pl.when(pl.col.byte_suffix == 'B')
.then(1)
.when(pl.col.byte_suffix == "KiB")
.when(pl.col.byte_suffix == 'KiB')
.then(1024)
.when(pl.col.byte_suffix == "MiB")
.when(pl.col.byte_suffix == 'MiB')
.then(1024**2)
.when(pl.col.byte_suffix == "GiB")
.when(pl.col.byte_suffix == 'GiB')
.then(1024**3)
.when(pl.col.byte_suffix == "TiB")
.when(pl.col.byte_suffix == 'TiB')
.then(1024**4)
.otherwise(-9999999999999),
byte_number=pl.col.byte_number.cast(pl.Float64),
Expand All @@ -35,10 +35,10 @@ def load_reth_db_sizes(path: str) -> pl.DataFrame:
reth = reth.with_columns(total_bytes)

reth = reth.with_columns(
pl.when(pl.col("Scales with # of") == pl.lit("TxSenders"))
.then(pl.lit("Transactions"))
.otherwise(pl.col("Scales with # of"))
.alias("Scales with # of")
pl.when(pl.col('Scales with # of') == pl.lit('TxSenders'))
.then(pl.lit('Transactions'))
.otherwise(pl.col('Scales with # of'))
.alias('Scales with # of')
)

return reth
5 changes: 1 addition & 4 deletions state_growth/datasets/storage_diffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,5 @@ def aggregate_storage_diffs(
df: state_growth.FrameType, *, group_by: str = 'block_number'
) -> state_growth.FrameType:
return (
df.group_by(group_by)
.agg(**state_growth.get_schema_agg(schema))
.sort(group_by)
df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
)

5 changes: 1 addition & 4 deletions state_growth/datasets/storage_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,5 @@ def aggregate_storage_reads(
df: state_growth.FrameType, *, group_by: str = 'block_number'
) -> state_growth.FrameType:
return (
df.group_by(group_by)
.agg(**state_growth.get_schema_agg(schema))
.sort(group_by)
df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
)

5 changes: 1 addition & 4 deletions state_growth/datasets/transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,5 @@ def aggregate_transactions(
df: state_growth.FrameType, *, group_by: str = 'block_number'
) -> state_growth.FrameType:
return (
df.group_by(group_by)
.agg(**state_growth.get_schema_agg(schema))
.sort(group_by)
df.group_by(group_by).agg(**state_growth.get_schema_agg(schema)).sort(group_by)
)

1 change: 0 additions & 1 deletion state_growth/timestamps/timestamps_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,3 @@ def load_block_timestamps(
)

return block_timestamps

19 changes: 12 additions & 7 deletions state_growth/transforms/transform_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,18 @@ def transform_chunks(
print('transforming', len(chunks), 'chunks:')
for chunk in chunks:
print('-', chunk)
print()

block_timestamps = state_growth.load_block_timestamps(**context)

for interval_type in time_columns:
output_datatype = output_datatype_template.format(
interval=state_growth.interval_type_names[interval_type]
)

if verbose:
print()
print('doing', output_datatype)

time_column_name = interval_type
raw_time_data = state_growth.get_block_time_interval(
block_timestamps, state_growth.interval_type_names[interval_type]
Expand Down Expand Up @@ -112,11 +116,13 @@ def transform_chunk(
context['network'] + '_' + 'state_growth',
output_datatype,
)
filename = '{network}__{datatype}__{start_block:08}_to_{end_block:08}.parquet'.format(
network=context['network'],
datatype=output_datatype,
start_block=start_block,
end_block=end_block,
filename = (
'{network}__{datatype}__{start_block:08}_to_{end_block:08}.parquet'.format(
network=context['network'],
datatype=output_datatype,
start_block=start_block,
end_block=end_block,
)
)
os.makedirs(parent_dir, exist_ok=True)
path = os.path.join(parent_dir, filename)
Expand Down Expand Up @@ -191,4 +197,3 @@ def transform_chunk(
tmp_path = path + '_tmp'
transformed.write_parquet(tmp_path)
shutil.move(tmp_path, path)

0 comments on commit 4a0a8e4

Please sign in to comment.