@@ -708,3 +708,55 @@ if test x"$Ac_cachevar" = x"yes"; then
708
708
fi
709
709
undefine ( [ Ac_cachevar] ) dnl
710
710
] ) # PGAC_AVX512_POPCNT_INTRINSICS
711
+
712
+ # PGAC_SVE_POPCNT_INTRINSICS
713
+ # --------------------------
714
+ # Check if the compiler supports the SVE popcount instructions using the
715
+ # svptrue_b64, svdup_u64, svcntb, svld1_u64, svld1_u8, svadd_u64_x,
716
+ # svcnt_u64_x, svcnt_u8_x, svaddv_u64, svaddv_u8, svwhilelt_b8_s32,
717
+ # svand_n_u64_x, and svand_n_u8_x intrinsic functions.
718
+ #
719
+ # If the intrinsics are supported, sets pgac_sve_popcnt_intrinsics.
720
+ AC_DEFUN ( [ PGAC_SVE_POPCNT_INTRINSICS] ,
721
+ [ define ( [ Ac_cachevar] , [ AS_TR_SH ( [ pgac_cv_sve_popcnt_intrinsics] ) ] ) dnl
722
+ AC_CACHE_CHECK ( [ for svcnt_x] , [ Ac_cachevar] ,
723
+ [ AC_LINK_IFELSE ( [ AC_LANG_PROGRAM ( [ [ #include <arm_sve.h>
724
+
725
+ char buf[ 128] ;
726
+
727
+ #if defined(__has_attribute) && __has_attribute (target)
728
+ __attribute__((target("arch=armv8-a+sve")))
729
+ #endif
730
+ static int popcount_test(void)
731
+ {
732
+ svbool_t pred = svptrue_b64();
733
+ svuint8_t vec8;
734
+ svuint64_t accum1 = svdup_u64(0),
735
+ accum2 = svdup_u64(0),
736
+ vec64;
737
+ char *p = buf;
738
+ uint64_t popcnt,
739
+ mask = 0x5555555555555555;
740
+
741
+ vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask);
742
+ accum1 = svadd_u64_x(pred, accum1, svcnt_u64_x(pred, vec64));
743
+ p += svcntb();
744
+
745
+ vec64 = svand_n_u64_x(pred, svld1_u64(pred, (const uint64_t *) p), mask);
746
+ accum2 = svadd_u64_x(pred, accum2, svcnt_u64_x(pred, vec64));
747
+ p += svcntb();
748
+
749
+ popcnt = svaddv_u64(pred, svadd_u64_x(pred, accum1, accum2));
750
+
751
+ pred = svwhilelt_b8_s32(0, sizeof(buf));
752
+ vec8 = svand_n_u8_x(pred, svld1_u8(pred, (const uint8_t *) p), 0x55);
753
+ return (int) (popcnt + svaddv_u8(pred, svcnt_u8_x(pred, vec8)));
754
+ }] ] ,
755
+ [ return popcount_test();] ) ] ,
756
+ [ Ac_cachevar=yes] ,
757
+ [ Ac_cachevar=no] ) ] )
758
+ if test x"$Ac_cachevar" = x"yes"; then
759
+ pgac_sve_popcnt_intrinsics=yes
760
+ fi
761
+ undefine ( [ Ac_cachevar] ) dnl
762
+ ] ) # PGAC_SVE_POPCNT_INTRINSICS
0 commit comments