Skip to content

Commit

Permalink
Programaticaly extract common FIO parameters from the scenario string.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 369767822
  • Loading branch information
NathanTeeuwen authored and copybara-github committed Apr 22, 2021
1 parent d443825 commit 3d67bdf
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGES.next.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
- Support wiring properties into DPB clusters with `--dpb_clusters_properties`
in addition to `--dpb_job_properties`.
- Add support for GCS and S3 I/O in PKB managed Spark and Hadoop clusters.
- Update FIO workload to support extraction of common benchmark parameters
from the scenario string.

### Bug fixes and maintenance updates:

Expand Down
154 changes: 136 additions & 18 deletions perfkitbenchmarker/linux_benchmarks/fio_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@
'scenario \'all\' generates all scenarios. Available '
'scenarios are sequential_write, sequential_read, '
'random_write, and random_read. Cannot use with '
'--fio_jobfile.')
'--fio_jobfile. You can also specify a scenario in the '
'format accesspattern_blocksize_operation_workingset '
'for a custom workload.')
flags.DEFINE_enum('fio_target_mode', AGAINST_FILE_WITHOUT_FILL_MODE,
[AGAINST_DEVICE_WITH_FILL_MODE,
AGAINST_DEVICE_WITHOUT_FILL_MODE,
Expand Down Expand Up @@ -260,13 +262,12 @@ def FillDevice(vm, disk, fill_size, exec_path):
{%- if scenario['rwmixread'] is defined %}
rwmixread={{scenario['rwmixread']}}
{%- endif%}
{%- if scenario['rwmixwrite'] is defined %}
rwmixwrite={{scenario['rwmixwrite']}}
{%- endif%}
blocksize={{scenario['blocksize']}}
iodepth={{iodepth}}
{%- if scenario['size'] is defined %}
size={{scenario['size']}}
{%- else %}
size={{size}}
{%- endif%}
numjobs={{numjob}}
{%- endfor %}
{%- endfor %}
Expand All @@ -275,6 +276,115 @@ def FillDevice(vm, disk, fill_size, exec_path):

SECONDS_PER_MINUTE = 60

# known rwkind fio parameters
RWKIND_SEQUENTIAL_READ = 'read'
RWKIND_SEQUENTIAL_WRITE = 'write'
RWKIND_RANDOM_READ = 'randread'
RWKIND_RANDOM_WRITE = 'randwrite'
RWKIND_SEQUENTIAL_READ_WRITE = 'rw' # 'readwrite' is also valid
RWKIND_RANDOM_READ_WRITE = 'randrw'
RWKIND_SEQUENTIAL_TRIM = 'trim'

# define fragments from scenario_strings
OPERATION_READ = 'read'
OPERATION_WRITE = 'write'
OPERATION_TRIM = 'trim'
OPERATION_READWRITE = 'readwrite' # mixed read and writes
ALL_OPERATIONS = frozenset([
OPERATION_READ, OPERATION_WRITE, OPERATION_TRIM, OPERATION_READWRITE])

ACCESS_PATTERN_SEQUENTIAL = 'seq'
ACCESS_PATTERN_RANDOM = 'rand'
ALL_ACCESS_PATTERNS = frozenset([
ACCESS_PATTERN_SEQUENTIAL, ACCESS_PATTERN_RANDOM])

# map from scenario_string fragments to rwkind fio parameter
MAP_ACCESS_OP_TO_RWKIND = {
(ACCESS_PATTERN_SEQUENTIAL, OPERATION_READ): RWKIND_SEQUENTIAL_READ,
(ACCESS_PATTERN_SEQUENTIAL, OPERATION_WRITE): RWKIND_SEQUENTIAL_WRITE,
(ACCESS_PATTERN_RANDOM, OPERATION_READ): RWKIND_RANDOM_READ,
(ACCESS_PATTERN_RANDOM, OPERATION_WRITE): RWKIND_RANDOM_WRITE,
(ACCESS_PATTERN_SEQUENTIAL, OPERATION_READWRITE):
RWKIND_SEQUENTIAL_READ_WRITE,
(ACCESS_PATTERN_RANDOM, OPERATION_READWRITE): RWKIND_RANDOM_READ_WRITE,
(ACCESS_PATTERN_SEQUENTIAL, RWKIND_SEQUENTIAL_TRIM): RWKIND_SEQUENTIAL_TRIM,
}

# check for known fields, as the JOB_FILE_TEMPLATE looks for these
# fields explicitly and needs to be updated
FIO_KNOWN_FIELDS_IN_JINJA = [
'rwmixread',
'rwmixwrite'
]


def GetScenarioFromScenarioString(scenario_string):
"""Extract rwkind,blocksize,size from scenario string."""
# look for legacy entries in the scenario map first
result = SCENARIOS.get(scenario_string, None)
if result:
# return a copy so that the scenario can be mutated further if needed
# without modifying the SCENARIO map
return result.copy()

# decode the parameters from the scenario name
# in the format accesspattern_blocksize_operation_workingset
# example pattern would be:
# rand_16k_write_100%
# seq_1M_write_100%
fields = scenario_string.split('_')
if len(fields) < 4:
raise errors.Setup.InvalidFlagConfigurationError(
f'Unexpected Scenario string format: {scenario_string}')
(access_pattern, blocksize_str, operation, workingset_str) = fields[0:4]

if access_pattern not in ALL_ACCESS_PATTERNS:
raise errors.Setup.InvalidFlagConfigurationError(
f'Unexpected access pattern {access_pattern} '
f'in scenario {scenario_string}')

if operation not in ALL_OPERATIONS:
raise errors.Setup.InvalidFlagConfigurationError(
f'Unexpected operation {operation}'
f'in scenario {scenario_string}')

access_op = (access_pattern, operation)
rwkind = MAP_ACCESS_OP_TO_RWKIND.get(access_op, None)
if not rwkind:
raise errors.Setup.InvalidFlagConfigurationError(
f'{access_pattern} and {operation} could not be mapped '
f'to a rwkind fio parameter from '
f'scenario {scenario_string}')

# required fields of JOB_FILE_TEMPLATE
result = {
'name': scenario_string,
'rwkind': rwkind,
'blocksize': blocksize_str,
'size': workingset_str
}

# The first four fields are well defined - after that, we use
# key value pairs to encode any extra fields we need
# The format is key-value for any additional fields appended
# e.g. rand_16k_readwrite_5TB_rwmixread-65
# random access pattern
# 16k block size
# readwrite operation
# 5 TB working set
# rwmixread of 65. (so 65% reads and 35% writes)
for extra_fields in fields[4:]:
key_value = extra_fields.split('-')
key = key_value[0]
value = key_value[1]
if key not in FIO_KNOWN_FIELDS_IN_JINJA:
raise errors.Setup.InvalidFlagConfigurationError(
'Unregonzied FIO parameter {0} out of scenario {1}'.format(
key, scenario_string))
result[key] = value

return result


def GenerateJobFileString(filename, scenario_strings,
io_depths, num_jobs, working_set_size,
Expand All @@ -298,26 +408,34 @@ def GenerateJobFileString(filename, scenario_strings,
if 'all' in scenario_strings:
scenarios = six.itervalues(SCENARIOS)
else:
for name in scenario_strings:
if name not in SCENARIOS:
logging.error('Unknown scenario name %s', name)
scenarios = (SCENARIOS[name] for name in scenario_strings)

size_string = str(working_set_size) + 'G' if working_set_size else '100%'
if block_size is not None:
# If we don't make a copy here, this will modify the global
# SCENARIOS variable.
scenarios = [scenario.copy() for scenario in scenarios]
for scenario in scenarios:
scenario['blocksize'] = str(int(block_size.m_as(units.byte))) + 'B'
scenarios = [GetScenarioFromScenarioString(scenario_string)
for scenario_string in scenario_strings]

default_size_string = (str(working_set_size) + 'G'
if working_set_size else '100%')
blocksize_override = (str(int(block_size.m_as(units.byte))) + 'B'
if block_size else None)

for scenario in scenarios:
# per legacy behavior, the block size parameter
# overrides what is defined in the scenario string
if blocksize_override:
scenario['blocksize'] = blocksize_override

# per legacy behavior, the size in the scenario_string overrides
# size defined on the command line
if 'size' not in scenario:
scenario['size'] = default_size_string

job_file_template = jinja2.Template(JOB_FILE_TEMPLATE,
undefined=jinja2.StrictUndefined)

# the template creates a cross product of all
# (scenario X io_depths X num_jobs)
# which allows a single run to produce all of these metrics
return str(job_file_template.render(
runtime=runtime,
filename=filename,
size=size_string,
scenarios=scenarios,
iodepths=io_depths,
numjobs=num_jobs,
Expand Down
39 changes: 39 additions & 0 deletions tests/linux_benchmarks/fio_benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,45 @@ def testCustomBlocksize(self):
self.assertEqual(fio_benchmark.SCENARIOS['sequential_write']['blocksize'],
orig_blocksize)

def testParseGenerateScenario(self):
expected_jobfile = """
[global]
ioengine=libaio
invalidate=1
direct=1
runtime=600
time_based
filename=/test/filename
do_verify=0
verify_fatal=0
group_reporting=1
randrepeat=0
[seq_64M_read_10TB-io-depth-1-num-jobs-1]
stonewall
rw=read
blocksize=64M
iodepth=1
size=10TB
numjobs=1
[rand_16k_readwrite_5TB_rwmixread-65-io-depth-1-num-jobs-1]
stonewall
rw=randrw
rwmixread=65
blocksize=16k
iodepth=1
size=5TB
numjobs=1"""

self.assertEqual(
fio_benchmark.GenerateJobFileString(
self.filename,
['seq_64M_read_10TB', 'rand_16k_readwrite_5TB_rwmixread-65'],
[1], [1],
None, None, 600, ['randrepeat=0']),
expected_jobfile)


class TestProcessedJobFileString(pkb_common_test_case.PkbCommonTestCase):

Expand Down

0 comments on commit 3d67bdf

Please sign in to comment.