Skip to content

[SYCL][NVPTX][AMDGCN] Move devicelib cmath to header #18706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 42 commits into from
Jul 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
b139fa4
[SYCL][NVPTX][AMDGCN] Move devicelib cmath to header
npmiller May 27, 2025
e942076
[SYCL] Fixup attribute handling
npmiller May 29, 2025
d192f33
[SYCL] Use hasAttr instead of hasExplicitAttr
npmiller May 29, 2025
a82cebc
[SYCL] Update fallback header
npmiller May 29, 2025
b3574fb
[SYCL] Remove sycl-libdevice-cmath.cpp test
npmiller May 29, 2025
3d2aa44
[SYCL] Add missing abs
npmiller May 29, 2025
8ce1d93
[SYCL] Fix overloadble requirement for sycl_device_only
npmiller Jun 9, 2025
d3b2988
[SYCL] Add device only docs
npmiller Jun 9, 2025
350aec6
[SYCL] Add initial test for sycl-device-only
npmiller Jun 9, 2025
2c119ae
[SYCL] Add diagnostic for host side sycl_device_only
npmiller Jun 10, 2025
605eed6
[SYCL] Block sycl_device_only emission on host side
npmiller Jun 11, 2025
172e7f7
Revert "[SYCL] Add diagnostic for host side sycl_device_only"
npmiller Jun 11, 2025
bb9fc66
[SYCL] Cleanup documentation and comments
npmiller Jun 11, 2025
06474cb
[SYCL] Fix attribute emission handling
npmiller Jun 12, 2025
59c6edf
[SYCL] Fix formatting
npmiller Jun 12, 2025
1a6e0f5
[SYCL] Rename variable
npmiller Jun 13, 2025
5df06cf
[SYCL] More fallback header improvements
npmiller Jun 13, 2025
e0cd399
[SYCL] Add nearbyint and rint to devicelib tests
npmiller Jun 13, 2025
8affa7a
[SYCL] Fix formatting
npmiller Jun 13, 2025
8d9733d
[SYCL] Add SYCL_EXTERNAL to neabyint and rint
npmiller Jun 13, 2025
ec84f57
[SYCL][E2E] Remove nearbyint from test and stl wrapper
npmiller Jun 16, 2025
9db7bde
[SYCL] Don't leak macros from cmath-fallback.h
npmiller Jun 18, 2025
d8a273b
[SYCL] Fix if/else/return formatting
npmiller Jun 25, 2025
f53840b
[SYCL] Cleanup use of isSYCL() in SemaOverload
npmiller Jun 25, 2025
8477da7
[SYCL] Add missing copyright notices
npmiller Jun 27, 2025
34d26f4
[SYCL] Re-use DDI in CodeGenModule
npmiller Jun 27, 2025
4a576c4
[SYCL] Cleanup and complete fallback header
npmiller Jul 1, 2025
728d557
[SYCL] Add cmath-fallback header test
npmiller Jul 1, 2025
6ac4ba1
[SYCL] Fix formatting
npmiller Jul 1, 2025
d9713d5
[SYCL] Fixup promotion overloads
npmiller Jul 2, 2025
56d0e33
[SYCL] Skip sycl.hpp in lit test
npmiller Jul 2, 2025
459ed03
[SYCL] Fixup SFINAE
npmiller Jul 2, 2025
a84f1e3
[SYCL] Minor cleanups
npmiller Jul 3, 2025
b8bfac4
Merge branch 'sycl' into rip-libdevice
npmiller Jul 3, 2025
2bdd6a5
[SYCL] Rename cmath header
npmiller Jul 11, 2025
ea6a3c5
[SYCL] Switch attribute from GNU to Clang
npmiller Jul 11, 2025
0a1508f
Merge branch 'sycl' into rip-libdevice
npmiller Jul 11, 2025
fba7042
[SYCL] Update copyright header
npmiller Jul 14, 2025
5d13b1d
Mark __sycl_cmath_wrapper_impl.hpp as expected to fail self-contained…
bader Jul 14, 2025
ef1e406
Make __sycl_cmath_wrapper_impl.hpp more upstream-able.
bader Jul 14, 2025
be69cdd
Update the header guard macro name.
bader Jul 14, 2025
6d52c97
[SYCL] Make sycl_device and sycl_device_only incompatible
npmiller Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1607,6 +1607,14 @@ def SYCLDevice : InheritableAttr {
let Documentation = [SYCLDeviceDocs];
}

def SYCLDeviceOnly : InheritableAttr {
let Spellings = [Clang<"sycl_device_only">];
let Subjects = SubjectList<[Function]>;
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let Documentation = [SYCLDeviceOnlyDocs];
}
def : MutualExclusions<[SYCLDevice, SYCLDeviceOnly]>;

def SYCLGlobalVar : InheritableAttr {
let Spellings = [GNU<"sycl_global_var">];
let Subjects = SubjectList<[GlobalStorageNonLocalVar], ErrorDiag>;
Expand Down
14 changes: 14 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -4518,6 +4518,20 @@ implicitly inherit this attribute.
}];
}

def SYCLDeviceOnlyDocs : Documentation {
let Category = DocCatFunction;
let Heading = "sycl_device_only";
let Content = [{
This attribute can only be applied to functions and indicates that the function
is only available for the device. It allows functions marked with it to
overload existing functions without the attribute, in which case the overload
with the attribute will be used on the device side and the overload without
will be used on the host side. Note: as opposed to ``sycl_device`` this does
not mark the function as being exported, both attributes are incompatible and
can't be used together.
}];
}

def RISCVInterruptDocs : Documentation {
let Category = DocCatFunction;
let Heading = "interrupt (RISC-V)";
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/AST/Decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3729,6 +3729,13 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
!(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc))
return 0;

// SYCL doesn't have a device-side standard library. SYCLDeviceOnlyAttr may
// be used to provide device-side definitions of standard functions, so
// anything with that attribute shouldn't be treated as a builtin.
if (Context.getLangOpts().isSYCL() && hasAttr<SYCLDeviceOnlyAttr>()) {
return 0;
}

// As AMDGCN implementation of OpenMP does not have a device-side standard
// library, none of the predefined library functions except printf and malloc
// should be treated as a builtin i.e. 0 should be returned for them.
Expand Down
7 changes: 2 additions & 5 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2782,10 +2782,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
GenerateIntrinsics =
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
}
bool IsSYCLDeviceWithoutIntrinsics =
getLangOpts().SYCLIsDevice &&
(getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN());
if (GenerateIntrinsics && !IsSYCLDeviceWithoutIntrinsics) {
if (GenerateIntrinsics) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIacos:
case Builtin::BIacosf:
Expand Down Expand Up @@ -3885,7 +3882,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_modf:
case Builtin::BI__builtin_modff:
case Builtin::BI__builtin_modfl:
if (Builder.getIsFPConstrained() || IsSYCLDeviceWithoutIntrinsics)
if (Builder.getIsFPConstrained())
break; // TODO: Emit constrained modf intrinsic once one exists.
return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf));
case Builtin::BI__builtin_isgreater:
Expand Down
34 changes: 34 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4323,6 +4323,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
}
}

// Don't emit 'sycl_device_only' function in SYCL host compilation.
if (LangOpts.SYCLIsHost && isa<FunctionDecl>(Global) &&
Global->hasAttr<SYCLDeviceOnlyAttr>()) {
return;
}

if (LangOpts.OpenMP) {
// If this is OpenMP, check if it is legal to emit this global normally.
if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
Expand Down Expand Up @@ -4412,6 +4418,34 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
}
}

// When using SYCLDeviceOnlyAttr, there can be two functions with the same
// mangling, the host function and the device overload. So when compiling for
// device we need to make sure we're selecting the SYCLDeviceOnlyAttr
// overload and dropping the host overload.
if (LangOpts.SYCLIsDevice) {
StringRef MangledName = getMangledName(GD);
auto DDI = DeferredDecls.find(MangledName);
Comment on lines +4426 to +4427
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a potentially expensive thing to do for every function to be emitted.

The changes to overload resolution should already ensure that the right declaration is placed on the deferred decls list due to ODR-use. I assume that the only other way that a function gets on this list is if it is a non-inline function defined in the translation unit. But those functions shouldn't be emitted for a SYCL device unless ODR-used anyway. So what are the scenarios that require checking for the "wrong" decl being added to the deferred decls list?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a very good point. I ran into this while trying to be thorough in the sycl-device-only.cpp lit test.

But in that lit test we're compiling with -cc1 -fsycl-is-device, which I believe will skip the regular SYCL host/device split stage that would get rid of the extra overload.

So I think you're correct that this case should never happen in regular SYCL compilation, but it's very easy to trigger with -fsycl-is-device, but it's probably fair to say that this type of device only code is invalid. I'll look into removing this condition block and adding a diagnostic instead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps one possibility is a function that is declared SYCL_EXTERNAL and defined in the translation unit? I assume those functions are emitted for the device target regardless of ODR-use. Something like the following would then be a problem (and should thus be diagnosed as an error to prevent a symbol conflict).

SYCL_EXTERNAL void f() {}
__attribute__((sycl_device_only)) void f() {}

Related question: Is it ok to declare a function SYCL_EXTERNAL and later define it as sycl_device_only? This is probably worth a test.

SYCL_EXTERNAL void g();
__attribute__((sycl_device_only)) void g() {}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SYCL_EXTERNAL void f() {}
__attribute__((sycl_device_only)) void f() {}

That does trigger the issue as well, but would get caught in the diagnsotic I'm looking at.

SYCL_EXTERNAL void g();
__attribute__((sycl_device_only)) void g() {}

I believe this should be fine, however without this condition block, I just found out that this:

SYCL_EXTERNAL void g();
void g() {};
__attribute__((sycl_device_only)) void g() {}

ends up selecting the "host" g in device code which is quite strange.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After some more investigation, it seems to me that this might still be the least disruptive solution.

Apparently the host overloads are still around when we reach CodeGen, even if they're not being called from the kernels. So we need to handle that here one way or another. It differs from regular overloading because we're overloading on an attribute so both functions have identical mangling. While at the AST level we're calling the right overload, when we reach CodeGen it mostly relies on mangling to connect the functions to their uses, which means that we lose the information of which overload was meant to be called and emitting either one can work.

So without this special handling CodeGen will just emit one or the other overload based on which one it processes first.

For example in my test sample, without this special handling, this:

void f() {}
__attribute__((sycl_device_only)) void f() {}

Ends up emitting the sycl_device_only overload, but this:

__attribute__((sycl_device_only)) void f() {}
void f() {}

Ends up emitting the "host" overload.

Adding sycl_device in the mix just makes the problem more obvious because it forces the compiler to try to actually emit both overloads which results in an error, but even without it the issue is there.

I'm sure there might be other, potentially cleaner ways of handling this, but I suspect they might require larger changes and be less contained. I'm also not seeing any obvious compilation performance issues, so I think we should be okay to go with this, what do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for that analysis @npmiller! I went to open an issue to follow up on this, but then I saw that we already do effectively the same thing for SYCL-CUDA

// For SYCL compilation of CUDA sources,
if (LangOpts.isSYCL() && LangOpts.CUDA && !LangOpts.CUDAIsDevice) {
// in case of SYCL-CUDA-host,
if (LangOpts.SYCLIsHost) {
if (Global->hasAttr<CUDAHostAttr>()) {
// remove already present __device__ function.
auto DDI = DeferredDecls.find(MangledName);
if (DDI != DeferredDecls.end())
DeferredDecls.erase(DDI);
} else if (Global->hasAttr<CUDADeviceAttr>()) {
// do not insert a __device__ function if a __host__ one is present.
auto DDI = DeferredDecls.find(MangledName);
if (DDI != DeferredDecls.end())
return;
}
}
// in case of SYCL-CUDA-device,
if (LangOpts.SYCLIsDevice) {
if (Global->hasAttr<CUDADeviceAttr>()) {
// remove already present __host__ function.
auto DDI = DeferredDecls.find(MangledName);
if (DDI != DeferredDecls.end())
DeferredDecls.erase(DDI);
} else if (Global->hasAttr<CUDAHostAttr>()) {
// do not insert a __host__ function if a __device__ one is present.
auto DDI = DeferredDecls.find(MangledName);
if (DDI != DeferredDecls.end())
return;
}
}
}

Based on that, I'm inclined to not worry about this; particularly since I don't see a strategy to do better. However, two things I'd like to check first to ensure there aren't additional issues:

  1. Based on your analysis, I think Manglings, MangledDeclNames, and DeferredDecls will remain consistent in that lookup for a mangled name in each will return a consistent set of declarations for the right entity. Does that sound right?
  2. I wonder if it is possible for the host definition to be emitted before the EmitGlobal() is called for the device-only definition. Can you try the following test case to ensure a symbol conflict isn't reported and that both g() and h() return 2?
int f() { return 1; }
[[clang::sycl_device]] int g() { return f(); }
[[clang::sycl_device_only]] int f() { return 2; }
[[clang::sycl_device]] int h() { return f(); }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be fair the SYCL for CUDA support doing something similar but it's pretty hacky itself and it's also guarded behind a flag so it's a bit more contained than this patch (I believe without the sycl-cuda compat flag LangOpts.isSYCL() && LangOpts.CUDA would never be true, even when compiling SYCL for Nvidia targets).

I think a possible option to do better would be to hook the attribute into the function multiversioning support, I only noticed this recently but it seems pretty similar to what we're trying to do here. It might be a fair bit of work though as it looks like it has a lot of special handling in both Sema and CodeGen.

  1. At a glance I think this is right, but I'll have another look at the code tomorrow and circle back.
  2. That test case works fine (compiled with -fsycl -fsycl-device-only -Xclang -disable-llvm-passes -S -emit-llvm):
; Function Attrs: convergent mustprogress norecurse nounwind                                                                                                                                  
define dso_local spir_func noundef i32 @_Z1gv() #0 !srcloc !6 {                                                                                                                               
entry:                                                                                                                                                                                        
  %retval = alloca i32, align 4                                                                                                                                                               
  %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4)                                                                                                                              
  %call = call spir_func noundef i32 @_Z1fv() #2                                                                                                                                              
  ret i32 %call                                                                                                                                                                               
}                                                                                                                                                                                             
                                                                                                                                                                                              
; Function Attrs: convergent mustprogress norecurse nounwind                                                                                                                                  
define linkonce_odr dso_local spir_func noundef i32 @_Z1fv() #1 !srcloc !7 {                                                                                                                  
entry:                                                                                                                                                                                        
  %retval = alloca i32, align 4                                                                                                                                                               
  %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4)                                                                                                                              
  ret i32 2                                                                                                                                                                                   
}                                                                                                                                                                                             
                                                                                                                                                                                              
; Function Attrs: convergent mustprogress norecurse nounwind                                                                                                                                  
define dso_local spir_func noundef i32 @_Z1hv() #0 !srcloc !8 {                                                                                                                               
entry:                                                                                                                                                                                        
  %retval = alloca i32, align 4                                                                                                                                                               
  %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4)                                                                                                                              
  %call = call spir_func noundef i32 @_Z1fv() #2                                                                                                                                              
  ret i32 %call                                                                                                                                                                               
} 

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function multiversioning depends on symbol mangling to differentiate the target functions with selection of which function to call decided at run-time. Unfortunately, I don't think it will help with this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having another look at (1) I think it still sounds right, and since multi-versioning doesn't apply I think we should be good with this solution, thanks for the reviews and discussion!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. Thanks for following up!

// If we have an existing declaration with the same mangling for this
// symbol it may be a SYCLDeviceOnlyAttr case.
if (DDI != DeferredDecls.end()) {
auto *PreviousGlobal = cast<ValueDecl>(DDI->second.getDecl());
// If the host declaration was already processed, replace it with the
// device only declaration.
if (!PreviousGlobal->hasAttr<SYCLDeviceOnlyAttr>() &&
Global->hasAttr<SYCLDeviceOnlyAttr>()) {
DeferredDecls[MangledName] = GD;
return;
}

// If the device only declaration was already processed, skip the
// host declaration.
if (PreviousGlobal->hasAttr<SYCLDeviceOnlyAttr>() &&
!Global->hasAttr<SYCLDeviceOnlyAttr>()) {
return;
}
}
}

// clang::ParseAST ensures that we emit the SYCL devices at the end, so
// anything that is a device (or indirectly called) will be handled later.
if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) {
Expand Down
20 changes: 20 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1486,6 +1486,17 @@ void Sema::ActOnExitFunctionContext() {
static bool AllowOverloadingOfFunction(const LookupResult &Previous,
ASTContext &Context,
const FunctionDecl *New) {
// SYCLDeviceOnlyAttr allows device side overloads of SYCL function, but it
// is incompatible with SYCLDeviceAttr, so don't allow overloads when both
// attributes are present.
if (Context.getLangOpts().isSYCL() &&
Previous.getResultKind() == LookupResultKind::Found &&
((New->hasAttr<SYCLDeviceOnlyAttr>() &&
Previous.getFoundDecl()->hasAttr<SYCLDeviceAttr>()) ||
(New->hasAttr<SYCLDeviceAttr>() &&
Previous.getFoundDecl()->hasAttr<SYCLDeviceOnlyAttr>())))
return false;

if (Context.getLangOpts().CPlusPlus || New->hasAttr<OverloadableAttr>())
return true;

Expand Down Expand Up @@ -3702,6 +3713,11 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S,
return true;
}

// Never merge SYCLDeviceOnlyAttr functions in their host variant
if (getLangOpts().isSYCL() &&
Old->hasAttr<SYCLDeviceOnlyAttr>() != New->hasAttr<SYCLDeviceOnlyAttr>())
return false;

diag::kind PrevDiag;
SourceLocation OldLocation;
std::tie(PrevDiag, OldLocation) =
Expand Down Expand Up @@ -7354,6 +7370,10 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
if (S.getLangOpts().CUDA && (D->template hasAttr<CUDADeviceAttr>() ||
D->template hasAttr<CUDAHostAttr>()))
return false;

// So does SYCL's device_only attribute.
if (S.getLangOpts().isSYCL() && D->template hasAttr<SYCLDeviceOnlyAttr>())
return false;
}
return D->isExternC();
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7224,6 +7224,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_SYCLDevice:
S.SYCL().handleSYCLDeviceAttr(D, AL);
break;
case ParsedAttr::AT_SYCLDeviceOnly:
handleSimpleAttribute<SYCLDeviceOnlyAttr>(S, D, AL);
break;
case ParsedAttr::AT_SYCLScope:
S.SYCL().handleSYCLScopeAttr(D, AL);
break;
Expand Down
35 changes: 35 additions & 0 deletions clang/lib/Sema/SemaOverload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,23 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New,
}
}

// Allow overloads with SYCLDeviceOnlyAttr
if (SemaRef.getLangOpts().isSYCL() && (Old->hasAttr<SYCLDeviceOnlyAttr>() !=
New->hasAttr<SYCLDeviceOnlyAttr>())) {
// SYCLDeviceOnlyAttr and SYCLDeviceAttr functions can't overload
if (((New->hasAttr<SYCLDeviceOnlyAttr>() &&
Old->hasAttr<SYCLDeviceAttr>()) ||
(New->hasAttr<SYCLDeviceAttr>() &&
Old->hasAttr<SYCLDeviceOnlyAttr>()))) {
SemaRef.Diag(New->getLocation(), diag::err_redefinition)
<< New->getDeclName();
SemaRef.notePreviousDefinition(Old, New->getLocation());
return false;
}

return true;
}

// The signatures match; this is not an overload.
return false;
}
Expand Down Expand Up @@ -11020,6 +11037,15 @@ bool clang::isBetterOverloadCandidate(
S.CUDA().IdentifyPreference(Caller, Cand2.Function);
}

// In SYCL device compilation mode prefer the overload with the
// SYCLDeviceOnly attribute.
if (S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) {
if (Cand1.Function->hasAttr<SYCLDeviceOnlyAttr>() !=
Cand2.Function->hasAttr<SYCLDeviceOnlyAttr>()) {
return Cand1.Function->hasAttr<SYCLDeviceOnlyAttr>();
}
}

// General member function overloading is handled above, so this only handles
// constructors with address spaces.
// This only handles address spaces since C++ has no other
Expand Down Expand Up @@ -11374,6 +11400,15 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl(
if (S.getLangOpts().CUDA)
CudaExcludeWrongSideCandidates(S, Candidates);

// In SYCL host compilation remove candidates marked SYCLDeviceOnly.
if (S.getLangOpts().SYCLIsHost) {
auto IsDeviceCand = [&](const OverloadCandidate *Cand) {
return Cand->Viable && Cand->Function &&
Cand->Function->hasAttr<SYCLDeviceOnlyAttr>();
};
llvm::erase_if(Candidates, IsDeviceCand);
}

Best = end();
for (auto *Cand : Candidates) {
Cand->Best = false;
Expand Down
37 changes: 37 additions & 0 deletions clang/test/CodeGenSYCL/sycl-device-only.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKD
// RUN: %clang_cc1 -fsycl-is-host -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKH
// Test code generation for sycl_device_only attribute.

// Verify that the device overload is used on device.
//
// CHECK-LABEL: _Z3fooi
// CHECKH: %add = add nsw i32 %0, 10
// CHECKD: %add = add nsw i32 %0, 20
int foo(int a) { return a + 10; }
__attribute__((sycl_device_only)) int foo(int a) { return a + 20; }

// Use a `sycl_device` function as entry point
__attribute__((sycl_device)) int bar(int b) { return foo(b); }

// Verify that the order of declaration doesn't change the behavior.
//
// CHECK-LABEL: _Z3fooswapi
// CHECKH: %add = add nsw i32 %0, 10
// CHECKD: %add = add nsw i32 %0, 20
__attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; }
int fooswap(int a) { return a + 10; }

// Use a `sycl_device` function as entry point.
__attribute__((sycl_device)) int barswap(int b) { return fooswap(b); }

// Verify that in extern C the attribute enables mangling.
extern "C" {
// CHECK-LABEL: _Z3fooci
// CHECKH: %add = add nsw i32 %0, 10
// CHECKD: %add = add nsw i32 %0, 20
int fooc(int a) { return a + 10; }
__attribute__((sycl_device_only)) int fooc(int a) { return a + 20; }

// Use a `sycl_device` function as entry point.
__attribute__((sycl_device)) int barc(int b) { return fooc(b); }
}
Loading
Loading