Skip to content

Commit

Permalink
xen/PMU: Initialization code for Xen PMU
Browse files Browse the repository at this point in the history
Map shared data structure that will hold CPU registers, VPMU context,
V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills
this information in its handler and passes it to the guest for further
processing.

Set up PMU VIRQ.

Now that perf infrastructure will assume that PMU is available on a PV
guest we need to be careful and make sure that accesses via RDPMC
instruction don't cause fatal traps by the hypervisor. Provide a nop
RDPMC handler.

For the same reason avoid issuing a warning on a write to APIC's LVTPC.

Both of these will be made functional in later patches.

Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: David Vrabel <[email protected]>
Signed-off-by: David Vrabel <[email protected]>
  • Loading branch information
Boris Ostrovsky authored and David Vrabel committed Aug 20, 2015
1 parent 5f14154 commit 65d0cf0
Show file tree
Hide file tree
Showing 10 changed files with 398 additions and 9 deletions.
123 changes: 123 additions & 0 deletions arch/x86/include/asm/xen/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,129 @@ struct vcpu_guest_context {
#endif
};
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);

/* AMD PMU registers and structures */
struct xen_pmu_amd_ctxt {
/*
* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
* For PV(H) guests these fields are RO.
*/
uint32_t counters;
uint32_t ctrls;

/* Counter MSRs */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
uint64_t regs[];
#elif defined(__GNUC__)
uint64_t regs[0];
#endif
};

/* Intel PMU registers and structures */
struct xen_pmu_cntr_pair {
uint64_t counter;
uint64_t control;
};

struct xen_pmu_intel_ctxt {
/*
* Offsets to fixed and architectural counter MSRs (relative to
* xen_pmu_arch.c.intel).
* For PV(H) guests these fields are RO.
*/
uint32_t fixed_counters;
uint32_t arch_counters;

/* PMU registers */
uint64_t global_ctrl;
uint64_t global_ovf_ctrl;
uint64_t global_status;
uint64_t fixed_ctrl;
uint64_t ds_area;
uint64_t pebs_enable;
uint64_t debugctl;

/* Fixed and architectural counter MSRs */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
uint64_t regs[];
#elif defined(__GNUC__)
uint64_t regs[0];
#endif
};

/* Sampled domain's registers */
struct xen_pmu_regs {
uint64_t ip;
uint64_t sp;
uint64_t flags;
uint16_t cs;
uint16_t ss;
uint8_t cpl;
uint8_t pad[3];
};

/* PMU flags */
#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */

/*
* Architecture-specific information describing state of the processor at
* the time of PMU interrupt.
* Fields of this structure marked as RW for guest should only be written by
* the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
* hypervisor during PMU interrupt). Hypervisor will read updated data in
* XENPMU_flush hypercall and clear PMU_CACHED bit.
*/
struct xen_pmu_arch {
union {
/*
* Processor's registers at the time of interrupt.
* WO for hypervisor, RO for guests.
*/
struct xen_pmu_regs regs;
/*
* Padding for adding new registers to xen_pmu_regs in
* the future
*/
#define XENPMU_REGS_PAD_SZ 64
uint8_t pad[XENPMU_REGS_PAD_SZ];
} r;

/* WO for hypervisor, RO for guest */
uint64_t pmu_flags;

/*
* APIC LVTPC register.
* RW for both hypervisor and guest.
* Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
* during XENPMU_flush or XENPMU_lvtpc_set.
*/
union {
uint32_t lapic_lvtpc;
uint64_t pad;
} l;

/*
* Vendor-specific PMU registers.
* RW for both hypervisor and guest (see exceptions above).
* Guest's updates to this field are verified and then loaded by the
* hypervisor into hardware during XENPMU_flush
*/
union {
struct xen_pmu_amd_ctxt amd;
struct xen_pmu_intel_ctxt intel;

/*
* Padding for contexts (fixed parts only, does not include
* MSR banks that are specified by offsets)
*/
#define XENPMU_CTXT_PAD_SZ 128
uint8_t pad[XENPMU_CTXT_PAD_SZ];
} c;
};

#endif /* !__ASSEMBLY__ */

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/xen/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm.o xen-asm_$(BITS).o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o apic.o
p2m.o apic.o pmu.o

obj-$(CONFIG_EVENT_TRACING) += trace.o

Expand Down
3 changes: 3 additions & 0 deletions arch/x86/xen/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg)

static void xen_apic_write(u32 reg, u32 val)
{
if (reg == APIC_LVTPC)
return;

/* Warn to see if there's any stray references */
WARN(1,"register: %x, value: %x\n", reg, val);
}
Expand Down
12 changes: 11 additions & 1 deletion arch/x86/xen/enlighten.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
#include "mmu.h"
#include "smp.h"
#include "multicalls.h"
#include "pmu.h"

EXPORT_SYMBOL_GPL(hypercall_page);

Expand Down Expand Up @@ -1082,6 +1083,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
return ret;
}

unsigned long long xen_read_pmc(int counter)
{
return 0;
}

void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
Expand Down Expand Up @@ -1216,7 +1222,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_msr = xen_write_msr_safe,

.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.read_pmc = xen_read_pmc,

.read_tscp = native_read_tscp,

Expand Down Expand Up @@ -1267,6 +1273,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
int cpu;

for_each_online_cpu(cpu)
xen_pmu_finish(cpu);

if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
BUG();
Expand Down
170 changes: 170 additions & 0 deletions arch/x86/xen/pmu.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#include <linux/types.h>
#include <linux/interrupt.h>

#include <asm/xen/hypercall.h>
#include <xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include <xen/interface/xenpmu.h>

#include "xen-ops.h"
#include "pmu.h"

/* x86_pmu.handle_irq definition */
#include "../kernel/cpu/perf_event.h"


/* Shared page between hypervisor and domain */
static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())

/* perf callbacks */
static int xen_is_in_guest(void)
{
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();

if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return 0;
}

if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
return 0;

return 1;
}

static int xen_is_user_mode(void)
{
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();

if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return 0;
}

if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
else
return !!(xenpmu_data->pmu.r.regs.cpl & 3);
}

static unsigned long xen_get_guest_ip(void)
{
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();

if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return 0;
}

return xenpmu_data->pmu.r.regs.ip;
}

static struct perf_guest_info_callbacks xen_guest_cbs = {
.is_in_guest = xen_is_in_guest,
.is_user_mode = xen_is_user_mode,
.get_guest_ip = xen_get_guest_ip,
};

/* Convert registers from Xen's format to Linux' */
static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
struct pt_regs *regs, uint64_t pmu_flags)
{
regs->ip = xen_regs->ip;
regs->cs = xen_regs->cs;
regs->sp = xen_regs->sp;

if (pmu_flags & PMU_SAMPLE_PV) {
if (pmu_flags & PMU_SAMPLE_USER)
regs->cs |= 3;
else
regs->cs &= ~3;
} else {
if (xen_regs->cpl)
regs->cs |= 3;
else
regs->cs &= ~3;
}
}

irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int ret = IRQ_NONE;
struct pt_regs regs;
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();

if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return ret;
}

xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
xenpmu_data->pmu.pmu_flags);
if (x86_pmu.handle_irq(&regs))
ret = IRQ_HANDLED;

return ret;
}

bool is_xen_pmu(int cpu)
{
return (per_cpu(xenpmu_shared, cpu) != NULL);
}

void xen_pmu_init(int cpu)
{
int err;
struct xen_pmu_params xp;
unsigned long pfn;
struct xen_pmu_data *xenpmu_data;

BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);

if (xen_hvm_domain())
return;

xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
if (!xenpmu_data) {
pr_err("VPMU init: No memory\n");
return;
}
pfn = virt_to_pfn(xenpmu_data);

xp.val = pfn_to_mfn(pfn);
xp.vcpu = cpu;
xp.version.maj = XENPMU_VER_MAJ;
xp.version.min = XENPMU_VER_MIN;
err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
if (err)
goto fail;

per_cpu(xenpmu_shared, cpu) = xenpmu_data;

if (cpu == 0)
perf_register_guest_info_callbacks(&xen_guest_cbs);

return;

fail:
pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
cpu, err);
free_pages((unsigned long)xenpmu_data, 0);
}

void xen_pmu_finish(int cpu)
{
struct xen_pmu_params xp;

if (xen_hvm_domain())
return;

xp.vcpu = cpu;
xp.version.maj = XENPMU_VER_MAJ;
xp.version.min = XENPMU_VER_MIN;

(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);

free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
per_cpu(xenpmu_shared, cpu) = NULL;
}
11 changes: 11 additions & 0 deletions arch/x86/xen/pmu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef __XEN_PMU_H
#define __XEN_PMU_H

#include <xen/interface/xenpmu.h>

irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
void xen_pmu_init(int cpu);
void xen_pmu_finish(int cpu);
bool is_xen_pmu(int cpu);

#endif /* __XEN_PMU_H */
Loading

0 comments on commit 65d0cf0

Please sign in to comment.