Skip to content

Commit

Permalink
[Profiler] Add API for Dynamic Activity Toggling [2/n] (#133035)
Browse files Browse the repository at this point in the history
Summary: During PT2 there are many GPU/CPU events that are unneccessary to profile in between a given step. To remedy this, we can add an API that takes in a list of activities and an arg to toggle said activies or not. For this diff we are adding the profiler API to propogate down to kineto (and in the future the collection.cpp logic). Subsequent diffs will be added for CPU toggling and e2e testing.

Test Plan: Tested by toggling backward gpu traces off and got following trace: https://www.internalfb.com/intern/perfdoctor/trace_view?filepath=tree/traces/dynocli/devvm2185.cco0.facebook.com/rank-0.Jul_31_13_40_55.3251726.pt.trace.json.gz&bucket=gpu_traces

Reviewed By: aaronenyeshi

Differential Revision: D60541767

Pull Request resolved: pytorch/pytorch#133035
Approved by: https://github.com/aaronenyeshi
  • Loading branch information
sraikund16 authored and pytorchmergebot committed Aug 9, 2024
1 parent b0b4723 commit d2ecdcb
Show file tree
Hide file tree
Showing 11 changed files with 72 additions and 2 deletions.
4 changes: 4 additions & 0 deletions torch/_C/_autograd.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def _prepare_profiler(
config: ProfilerConfig,
activities: set[ProfilerActivity],
) -> None: ...
def _toggle_collection_dynamic(
enable: bool,
activities: set[ProfilerActivity],
) -> None: ...
def _disable_profiler() -> _ProfilerResult: ...
def _profiler_enabled() -> bool: ...
def _add_metadata_json(key: str, value: str) -> None: ...
Expand Down
1 change: 1 addition & 0 deletions torch/autograd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ def variable(*args, **kwargs): # noqa: D103
_record_function_with_args_exit,
_set_empty_test_observer,
_supported_activities,
_toggle_collection_dynamic,
DeviceType,
kineto_available,
ProfilerEvent,
Expand Down
11 changes: 10 additions & 1 deletion torch/autograd/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import defaultdict
from dataclasses import dataclass
from time import perf_counter_ns
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Iterable, List, Optional
from warnings import warn

import torch
Expand All @@ -16,6 +16,7 @@
_prepare_profiler,
_ProfilerResult,
_supported_activities,
_toggle_collection_dynamic,
DeviceType,
kineto_available,
ProfilerActivity,
Expand Down Expand Up @@ -440,6 +441,14 @@ def export_stacks(self, path: str, metric: str = "self_cpu_time_total"):
assert self.with_stack, "export_stacks() requires with_stack=True"
return self.function_events.export_stacks(path, metric)

def toggle_collection_dynamic(
self, enabled: bool, activities: Iterable[ProfilerActivity]
):
"""
Toggles the collection of activities for the current profiler instance.
"""
return _toggle_collection_dynamic(enabled, set(activities))

def key_averages(self, group_by_input_shape=False, group_by_stack_n=0):
self._check_finish()
assert self.function_events is not None, "Expected profiling results"
Expand Down
4 changes: 4 additions & 0 deletions torch/csrc/autograd/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,10 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
"_prepare_profiler",
prepareProfiler,
py::call_guard<py::gil_scoped_release>());
m.def(
"_toggle_collection_dynamic",
toggleCollectionDynamic,
py::call_guard<py::gil_scoped_release>());
m.def("_add_metadata_json", addMetadataJson); // Only if `USE_KINETO` is set
m.def("_kineto_step", profilerStep); // Only if `USE_KINETO` is set
m.def("kineto_available", []() { return torch::profiler::kKinetoAvailable; });
Expand Down
16 changes: 16 additions & 0 deletions torch/csrc/autograd/profiler_kineto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,22 @@ void prepareProfiler(
}
}

void toggleCollectionDynamic(
const bool enable,
const std::set<torch::profiler::impl::ActivityType>& activities) {
// TODO: CPU toggling should be done in this file to interface with collection
// similar to enableProfiler call GPU toggling is called in impl::kineto as is
for (auto act : activities) {
if (act != torch::autograd::profiler::ActivityType::CUDA) {
LOG(WARNING)
<< "Dynamic toggle is only supported for GPU activity, skipping toggling of "
<< actToString(act);
continue;
}
torch::profiler::impl::kineto::toggleCollectionDynamic(enable);
}
}

void enableProfilerWithEventPostProcess(
const torch::profiler::impl::ProfilerConfig& config,
const std::set<torch::profiler::impl::ActivityType>& activities,
Expand Down
4 changes: 4 additions & 0 deletions torch/csrc/autograd/profiler_kineto.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ TORCH_API void prepareProfiler(
const torch::profiler::impl::ProfilerConfig& config,
const std::set<torch::profiler::impl::ActivityType>& activities);

TORCH_API void toggleCollectionDynamic(
const bool enable,
const std::set<torch::profiler::impl::ActivityType>& activities);

/**
* When a C++ thread really has no control over how the profiler was enabled,
* for example, by some unreachable Python code, it can call these functions
Expand Down
10 changes: 10 additions & 0 deletions torch/csrc/profiler/kineto_shim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,16 @@ void prepareTrace(
#endif // USE_KINETO
}

void toggleCollectionDynamic(const bool enable) {
#ifdef USE_KINETO
// TODO: We may want to consider adding another input arg for this function
// if we want to support turning off certain devices and keeping others on.
// For now, we can keep it simple at have it turn off all tracing of "CUDA"
// devices
libkineto::api().activityProfiler().toggleCollectionDynamic(enable);
#endif // USE_KINETO
}

void startTrace() {
#ifdef USE_KINETO
libkineto::api().activityProfiler().startTrace();
Expand Down
2 changes: 2 additions & 0 deletions torch/csrc/profiler/kineto_shim.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ void prepareTrace(
const bool cpuOnly,
const ActivitySet& activities,
const torch::profiler::impl::ExperimentalConfig& config);

void toggleCollectionDynamic(const bool enable);
void startTrace();
ActivityTraceWrapper stopTrace();
void pushCorrelationId(uint64_t correlation_id);
Expand Down
6 changes: 6 additions & 0 deletions torch/csrc/profiler/orchestration/observer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ enum class C10_API_ENUM ActivityType {
NUM_KINETO_ACTIVITIES, // must be the last one
};

inline std::string actToString(ActivityType t) {
const std::string ActivityTypeNames[] = {
"CPU", "XPU", "CUDA", "MTIA", "PrivateUse1"};
return ActivityTypeNames[static_cast<int>(t)];
}

enum class C10_API_ENUM ProfilerState {
Disabled = 0,
CPU, // CPU-only profiling
Expand Down
14 changes: 14 additions & 0 deletions torch/profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,20 @@ def export_stacks(self, path: str, metric: str = "self_cpu_time_total"):
assert self.profiler
return self.profiler.export_stacks(path, metric)

def toggle_collection_dynamic(
self, enable: bool, activities: Iterable[ProfilerActivity]
):
"""Toggle collection of activities on/off
Args:
activities (iterable): list of activity groups (CPU, CUDA) to use in profiling, supported values:
``torch.profiler.ProfilerActivity.CPU``, ``torch.profiler.ProfilerActivity.CUDA``,
``torch.profiler.ProfilerActivity.XPU``.
"""
if not self.profiler:
return
self.profiler.toggle_collection_dynamic(enable, activities)

def key_averages(
self, group_by_input_shape: bool = False, group_by_stack_n: int = 0
):
Expand Down

0 comments on commit d2ecdcb

Please sign in to comment.