-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Avoid LDGSTS routing by changing default copy to be universalcopy #1674
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# PyCache files | ||
__pycache__/ | ||
cutlass_library.egg-info/ | ||
cutlass_library.egg-info/ | ||
build/ | ||
.vscode/ | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -485,7 +485,7 @@ int main(int argc, char const **args) { | |
// Tiled copy from Smem to Registers | ||
// Note : CuTe will vectorize this copy if the tiling + swizzling above were right | ||
using TiledCopyS2R = TiledCopy< | ||
Copy_Atom<DefaultCopy, ElementAcc>, | ||
Copy_Atom<AutoVectorizingCopyWithAssumedAlignment<128>, ElementAcc>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the DefaultCopy used to be |
||
Layout< Shape<_128,_16>, | ||
Stride<_16,_1>>, | ||
TileShapeS2R>; | ||
|
@@ -496,9 +496,9 @@ int main(int argc, char const **args) { | |
cutlass::gemm::TagToStrideC_t<LayoutD>, | ||
cutlass::epilogue::thread::LinearCombination<int32_t, 1, int32_t, int32_t>, | ||
SmemLayout, | ||
Copy_Atom<DefaultCopy, ElementAcc>, | ||
Copy_Atom<AutoVectorizingCopyWithAssumedAlignment<128>, ElementAcc>, | ||
TiledCopyS2R, | ||
Copy_Atom<DefaultCopy, ElementOutput>>>; | ||
Copy_Atom<AutoVectorizingCopyWithAssumedAlignment<128>, ElementOutput>>>; | ||
|
||
// | ||
// Assembling the GemmKernel | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,7 +109,7 @@ template <class T, class GmemLayout, class RmemTiler> | |
void | ||
test_copy_vectorization(GmemLayout gmem_layout, RmemTiler rmem_tiler) | ||
{ | ||
test_copy_vectorization<T>(DefaultCopy{}, gmem_layout, rmem_tiler); | ||
test_copy_vectorization<T>(AutoVectorizingCopyWithAssumedAlignment<128>{}, gmem_layout, rmem_tiler); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the DefaultCopy used to be |
||
} | ||
|
||
TEST(SM70_CuTe_Volta, SimpleVec) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm hoping this is okay to add? I've a habit of just doing
git add .
for small repos and it can get annoying having to deal with me accidently committing my build directory.