Skip to content

Commit

Permalink
Merge pull request FFTW#112 from alexeicolin/PR--armv7-pmccntr-counte…
Browse files Browse the repository at this point in the history
…r-and-docs

Pr  armv7 pmccntr counter and docs
  • Loading branch information
matteo-frigo authored Nov 1, 2017
2 parents b5ccc55 + 2be183c commit 1b64d92
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 7 deletions.
93 changes: 93 additions & 0 deletions README-perfcnt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
Performance Counters
====================

FFTW measures execution time in the planning stage, optionally taking advantage
of hardware performance counters. This document describes the supported
counters and additional steps needed to enable each on different architectures.

See `./configure --help` for flags for enabling each supported counter.
See [kernel/cycle.h](kernel/cycle.h) for the code that accesses the counters.

ARMv7-A (armv7a)
================

`CNTVCT`: Virtual Count Register in VMSA
--------------------------------------

A 64-bit counter part of Virtual Memory System Architecture.
Section B4.1.34 in ARM Architecture Reference Manual ARMv7-A/ARMv7-R

For access from user mode, requires `CNTKCTL.PL0VCTEN == 1`, which must
be set in kernel mode on each CPU:

#define CNTKCTL_PL0VCTEN 0x2 /* B4.1.26 in ARM Architecture Rreference */
uint32_t r;
asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r"(r)); /* read */
r |= CNTKCTL_PL0VCTEN;
asm volatile("mcr p15, 0, %0, c14, c1, 0" :: "r"(r)); /* write */

Kernel module source *which can be patched with the above code* available at:
https://github.com/thoughtpolice/enable_arm_pmu

`PMCCNTR`: Performance Monitors Cycle Count Register in VMSA
----------------------------------------------------------

A 32-bit counter part of Virtual Memory System Architecture.
Section B4.1.113 in ARM Architecture Reference Manual ARMv7-A/ARMv7-R

For access from user mode, requires user-mode access to PMU to be enabled
(`PMUSERENR.EN == 1`), which must be done from kernel mode on each CPU:

#define PERF_DEF_OPTS (1 | 16)
/* enable user-mode access to counters */
asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
/* Program PMU and enable all counters */
asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(PERF_DEF_OPTS));
asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));

Kernel module source with the above code available at:
[GitHub thoughtpolice/enable\_arm\_pmu](https://github.com/thoughtpolice/enable_arm_pmu)

More information:
http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html

ARMv8-A (aarch64)
=================

`CNTVCT_EL0`: Counter-timer Virtual Count Register
------------------------------------------------

A 64-bit counter, part of Generic Registers.
Section D8.5.17 in ARM Architecture Reference Manual ARMv8-A

For user-mode access, requires `CNTKCTL_EL1.EL0VCTEN == 1`, which
must be set from kernel mode for each CPU:

#define CNTKCTL_EL0VCTEN 0x2
uint32_t r;
asm volatile("mrs %0, CNTKCTL_EL1" : "=r"(r)); /* read */
r |= CNTKCTL_EL0VCTEN;
asm volatile("msr CNTKCTL_EL1, %0" :: "r"(r)); /* write */

*WARNING*: Above code was not tested.

`PMCCNTR_EL0`: Performance Monitors Cycle Count Register
------------------------------------------------------

A 64-bit counter, part of Performance Monitors.
Section D8.4.2 in ARM Architecture Reference Manual ARMv8-A

For access from user mode, requires user-mode access to PMU (`PMUSERENR_EL0.EN
== 1`), which must be set from kernel mode for each CPU:

#define PERF_DEF_OPTS (1 | 16)
/* enable user-mode access to counters */
asm volatile("msr PMUSERENR_EL0, %0" :: "r"(1));
/* Program PMU and enable all counters */
asm volatile("msr PMCR_EL0, %0" :: "r"(PERF_DEF_OPTS));
asm volatile("msr PMCNTENSET_EL0, %0" :: "r"(0x8000000f));
asm volatile("msr PMCCFILTR_EL0, %0" :: "r"(0));

Kernel module source with the above code available at:
[GitHub rdolbeau/enable\_arm\_pmu](https://github.com/rdolbeau/enable_arm_pmu)
or in [Pull Request #2 at thoughtpolice/enable\_arm\_pmu](https://github.com/thoughtpolice/enable_arm_pmu/pull/2)
15 changes: 10 additions & 5 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -200,21 +200,26 @@ if test "$have_neon" = "yes"; then
fi
AM_CONDITIONAL(HAVE_NEON, test "$have_neon" = "yes")

AC_ARG_ENABLE(armv8cyclecounter, [AC_HELP_STRING([--enable-armv8cyclecounter],[enable the cycle counter on ARMv8 via the PMCCNTR_EL0 register. Requires enabling in kernel mode, see <https://github.com/rdolbeau/enable_arm_pmu>])], have_armv8cyclecounter=$enableval)
if test "$have_armv8cyclecounter"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8CC,1,[Define if you have enabled the cycle counter on ARMv8])
AC_ARG_ENABLE(armv8-pmccntr-el0, [AC_HELP_STRING([--enable-armv8-pmccntr-el0],[enable the cycle counter on ARMv8 via the PMCCNTR_EL0 register (see README-perfcounters for details and mandatory instructions)])], have_armv8pmccntrel0=$enableval)
if test "$have_armv8pmccntrel0"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8_PMCCNTR_EL0,1,[Define if you have enabled the PMCCNTR_EL0 cycle counter on ARMv8])
fi

AC_ARG_ENABLE(armv8-cntvct-el0, [AC_HELP_STRING([--enable-armv8-cntvct-el0],[enable the cycle counter on ARMv8 via the CNTVCT_EL0 register])], have_armv8cntvctel0=$enableval)
AC_ARG_ENABLE(armv8-cntvct-el0, [AC_HELP_STRING([--enable-armv8-cntvct-el0],[enable the cycle counter on ARMv8 via the CNTVCT_EL0 register (see README-perfcounters for details and mandatory instructions)])], have_armv8cntvctel0=$enableval)
if test "$have_armv8cntvctel0"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8_CNTVCT_EL0,1,[Define if you have enabled the CNTVCT_EL0 cycle counter on ARMv8])
fi

AC_ARG_ENABLE(armv7a-cntvct, [AC_HELP_STRING([--enable-armv7a-cntvct],[enable the cycle counter on Armv7a via the CNTVCT register])], have_armv7acntvct=$enableval)
AC_ARG_ENABLE(armv7a-cntvct, [AC_HELP_STRING([--enable-armv7a-cntvct],[enable the cycle counter on Armv7a via the CNTVCT register (see README-perfcounters for details and mandatory instructions)])], have_armv7acntvct=$enableval)
if test "$have_armv7acntvct"x = "yes"x; then
AC_DEFINE(HAVE_ARMV7A_CNTVCT,1,[Define if you have enabled the CNTVCT cycle counter on ARMv7a])
fi

AC_ARG_ENABLE(armv7a-pmccntr, [AC_HELP_STRING([--enable-armv7a-pmccntr],[enable the cycle counter on Armv7a via the PMCCNTR register (see README-perfcounters for details and mandatory instructions)])], have_armv7apmccntr=$enableval)
if test "$have_armv7apmccntr"x = "yes"x; then
AC_DEFINE(HAVE_ARMV7A_PMCCNTR,1,[Define if you have enabled the PMCCNTR cycle counter on ARMv7a])
fi

AC_ARG_ENABLE(generic-simd128, [AC_HELP_STRING([--enable-generic-simd128],[enable generic (gcc) 128-bit SIMD optimizations])], have_generic_simd128=$enableval, have_generic_simd128=no)
if test "$have_generic_simd128" = "yes"; then
AC_DEFINE(HAVE_GENERIC_SIMD128,1,[Define to enable generic (gcc) 128-bit SIMD optimizations.])
Expand Down
16 changes: 14 additions & 2 deletions kernel/cycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,19 @@ INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif

#if defined(__aarch64__) && defined(HAVE_ARMV8_CNTVCT_EL0) && !defined(HAVE_ARMV8CC)
#if defined(HAVE_ARMV7A_PMCCNTR)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
uint32_t r;
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(r) );
return r;
}
INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif

#if defined(__aarch64__) && defined(HAVE_ARMV8_CNTVCT_EL0) && !defined(HAVE_ARMV8_PMCCNTR_EL0)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
Expand All @@ -539,7 +551,7 @@ INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif

#if defined(__aarch64__) && defined(HAVE_ARMV8CC)
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0)
typedef uint64_t ticks;
static inline ticks getticks(void)
{
Expand Down

0 comments on commit 1b64d92

Please sign in to comment.