@@ -14176,14 +14176,17 @@
14176
14176
dispatch:
14177
14177
CUDA: _scaled_dot_product_flash_attention_backward_cuda
14178
14178
14179
- - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, bool compute_log_sumexp, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp)
14179
+ - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset )
14180
14180
dispatch:
14181
14181
CUDA: _scaled_dot_product_efficient_attention_cuda
14182
14182
NestedTensorCUDA: _scaled_dot_product_efficient_attention_nestedtensor_cuda
14183
+ tags: nondeterministic_seeded
14183
14184
14184
- - func: _scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, bool is_causal=False, bool chunk_grad_outputs=False, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14185
+ - func: _scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, float dropout_p, bool is_causal=False, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14186
+ device_check: NoCheck
14185
14187
dispatch:
14186
14188
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14189
+ tags: nondeterministic_seeded
14187
14190
14188
14191
# THIS FUNCTION iS DEPRECATED AND SHOULD BE REMOVED
14189
14192
- func: _chunk_grad_outputs_efficient_attention(Tensor query, Tensor key, Tensor value, bool is_causal=False) -> bool
@@ -14203,13 +14206,13 @@
14203
14206
CUDA: _flash_attention_backward
14204
14207
14205
14208
# Returns ouput, logsumexp if compute_logsumexp
14206
- - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp)
14209
+ - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset )
14207
14210
variants: function
14208
14211
dispatch:
14209
14212
CUDA: _efficient_attention_forward
14210
14213
tags: nondeterministic_seeded
14211
14214
14212
- - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor rng_seed , Tensor rng_offset , int custom_mask_type, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
14215
+ - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor philox_seed , Tensor philox_offset , int custom_mask_type, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
14213
14216
device_check: NoCheck
14214
14217
variants: function
14215
14218
dispatch:
14219
14222
variants: function
14220
14223
dispatch:
14221
14224
CUDA: triton_scaled_dot_attention
14225
+ tags: nondeterministic_seeded
14222
14226
autogen: _triton_scaled_dot_attention.out
14227
+
14228
+ - func: _fill_mem_eff_dropout_mask_(Tensor(a!) self, float dropout_p, int seed, int offset) -> Tensor(a!)
14229
+ variants: function
14230
+ dispatch:
14231
+ CUDA: _fill_mem_eff_dropout_mask_
14223
14232
tags: nondeterministic_seeded
14224
14233
14225
14234
- func: _triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
0 commit comments