Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#13609: Uplift dram and l1 allocators to use dram/l1 specific alignment #13762

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
27ff243
#13609: Uplift dram and l1 allocators to use dram/l1 specific alignment
abhullar-tt Oct 11, 2024
21e4632
#13609: Update memcpy to device to handle 16B aligned writes
abhullar-tt Oct 16, 2024
f48416c
#12549: Fix BH unaligned read issue for tiled interleaved transpose HC
sjameelTT Oct 30, 2024
92d0d5f
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 16, 2024
479f4da
#13609: remove the change to genfiles.cpp/hpp
llongTT Dec 16, 2024
1c4ea2d
#13609: also remove the deprecated call of jit_build_genfiles_bank_to…
llongTT Dec 16, 2024
7d8ae9a
#13609: fix the build failure due to allocator_alignment change
llongTT Dec 16, 2024
86af491
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 16, 2024
fff8bbb
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 16, 2024
9590f84
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 17, 2024
45be757
#13609: enforce the alignment to the max of input/output to allow noc…
llongTT Dec 17, 2024
c9cbdff
#13609: fix the failed test_sharded tests, using keep_l1_aligned flag
llongTT Dec 18, 2024
353945b
#13609: take care of sharded padding failure due to DRAM/L1 alignmen…
llongTT Dec 18, 2024
5e0bdda
#13609: stick to the usage of keep_l1_aligned = True for now
llongTT Dec 18, 2024
628e010
#13609: switch to i2s/s2i call explicitly to keep l1 aligned
llongTT Dec 19, 2024
740938e
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 19, 2024
53c9f09
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 19, 2024
49aef72
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 20, 2024
d8a7c8d
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 20, 2024
0174c05
Add allocator api to get alignment based on all buffer types
abhullar-tt Dec 20, 2024
6f16c44
Merge branch 'main' into abhullar/diff-aligns
llongTT Dec 26, 2024
7c46541
#13609: Temporarily skip the failed tests to see if more tests fail
llongTT Dec 26, 2024
de04fc0
#13609: skip more tests
llongTT Dec 30, 2024
939caa5
#13609: Update to address the group norm unit test issue
llongTT Jan 2, 2025
af1451f
#13609: enable group norm tests
llongTT Jan 2, 2025
4b1ab24
#13609: fix of test fold issue, working on GS now
llongTT Jan 3, 2025
774dc43
fix the segmentation fault due to the hugepage address alignment chec…
llongTT Jan 3, 2025
fead9bc
#13609: enable test_permute_5d_blocked as the memory issue has been f…
llongTT Jan 3, 2025
ffbf4a4
#13609: explicitely pack l1 for group norm unit test of test_group_no…
llongTT Jan 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add allocator api to get alignment based on all buffer types
  • Loading branch information
abhullar-tt committed Dec 20, 2024
commit 0174c05646a922cde60001ae6dce5d409b06d283
6 changes: 4 additions & 2 deletions tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,8 @@ struct InterleavedPow2AddrGen {
const uint32_t bank_base_address;
const uint32_t log_base_2_of_page_size; // WARNING: This struct is used for optimized get_noc_addr in which case
// you know that bank_unit_size is a power of 2
const uint32_t log_base_2_of_allocator_alignment = interleaved_addr_gen::get_log_base2_of_allocator_alignment<DRAM>();
static constexpr uint32_t log_base_2_of_allocator_alignment =
interleaved_addr_gen::get_log_base2_of_allocator_alignment<DRAM>();
const uint32_t aligned_log_base_2_of_page_size = this->log_base_2_of_page_size > log_base_2_of_allocator_alignment
? this->log_base_2_of_page_size
: log_base_2_of_allocator_alignment;
Expand Down Expand Up @@ -1224,7 +1225,8 @@ template <bool DRAM>
struct InterleavedPow2AddrGenFast {
uint32_t bank_base_address; // Base address for the whole tensor.
const uint32_t log_base_2_of_page_size; // Num bytes in bank unit.
const uint32_t log_base_2_of_allocator_alignment = interleaved_addr_gen::get_log_base2_of_allocator_alignment<DRAM>();
static constexpr uint32_t log_base_2_of_allocator_alignment =
interleaved_addr_gen::get_log_base2_of_allocator_alignment<DRAM>();
const uint32_t aligned_log_base_2_of_page_size = this->log_base_2_of_page_size > log_base_2_of_allocator_alignment
? this->log_base_2_of_page_size
: log_base_2_of_allocator_alignment;
Expand Down
12 changes: 12 additions & 0 deletions tt_metal/impl/allocator/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,18 @@ const std::vector<uint32_t>& bank_ids_from_logical_core(
return allocator.logical_core_to_bank_ids.at(buffer_type).at(logical_core);
}

uint32_t get_alignment(const Allocator& alloator, const BufferType& buffer_type) {
switch (buffer_type) {
case BufferType::DRAM:
case BufferType::TRACE: return alloator.config.dram_alignment;
case BufferType::L1:
case BufferType::L1_SMALL: return alloator.config.l1_alignment;
default: {
TT_THROW("Allocator does not support buffer ");
}
}
}

Statistics get_statistics(const Allocator& allocator, const BufferType& buffer_type) {
Statistics stats;
switch (buffer_type) {
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/impl/allocator/allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ const std::vector<uint32_t>& bank_ids_from_dram_channel(const Allocator& allocat
const std::vector<uint32_t>& bank_ids_from_logical_core(
const Allocator& allocator, BufferType buffer_type, const CoreCoord& logical_core);

uint32_t get_alignment(const Allocator& alloator, const BufferType& buffer_type);

Statistics get_statistics(const Allocator& allocator, const BufferType& buffer_type);

void dump_memory_blocks(const Allocator& allocator, const BufferType& buffer_type, std::ofstream& out);
Expand Down
6 changes: 3 additions & 3 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3388,12 +3388,12 @@ allocator::Statistics Device::get_memory_allocation_statistics(const BufferType

uint32_t Device::get_allocator_alignment(const BufferType &buffer_type) const {
const auto& allocator = this->get_initialized_allocator();
return (buffer_type == BufferType::DRAM) ? this->allocator_->config.dram_alignment : this->allocator_->config.l1_alignment;
return allocator::get_alignment(*allocator, buffer_type);
}

uint32_t Device::get_allocator_alignment(SubDeviceId sub_device_id, const BufferType &buffer_type) const {
uint32_t Device::get_allocator_alignment(const BufferType &buffer_type, SubDeviceId sub_device_id) const {
const auto& allocator = this->get_initialized_allocator(sub_device_id);
return (buffer_type == BufferType::DRAM) ? this->allocator_->config.dram_alignment : this->allocator_->config.l1_alignment;
return allocator::get_alignment(*allocator, buffer_type);
}

size_t Device::get_l1_small_size() const {
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ class Device {
allocator::Statistics get_memory_allocation_statistics(const BufferType &buffer_type, SubDeviceId sub_device_id) const;

uint32_t get_allocator_alignment(const BufferType &buffer_type) const;
uint32_t get_allocator_alignment(SubDeviceId sub_device_id, const BufferType &buffer_type) const;
uint32_t get_allocator_alignment(const BufferType &buffer_type, SubDeviceId sub_device_id) const;

size_t get_l1_small_size() const;
size_t get_l1_small_size(SubDeviceId sub_device_id) const;
Expand Down
Loading