Skip to content

Commit

Permalink
x86/xen/time: setup vcpu 0 time info page
Browse files Browse the repository at this point in the history
In order to support pvclock vdso on xen we need to setup the time
info page for vcpu 0 and register the page with Xen using the
VCPUOP_register_vcpu_time_memory_area hypercall. This hypercall
will also forcefully update the pvti which will set some of the
necessary flags for vdso. Afterwards we check if it supports the
PVCLOCK_TSC_STABLE_BIT flag which is mandatory for having
vdso/vsyscall support. And if so, it will set the cpu 0 pvti that
will be later on used when mapping the vdso image.

The xen headers are also updated to include the new hypercall for
registering the secondary vcpu_time_info struct.

Signed-off-by: Joao Martins <[email protected]>
Reviewed-by: Juergen Gross <[email protected]>
Reviewed-by: Boris Ostrovsky <[email protected]>
Signed-off-by: Boris Ostrovsky <[email protected]>
  • Loading branch information
jpemartins authored and Boris Ostrovsky committed Nov 8, 2017
1 parent b888808 commit 2229f70
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 1 deletion.
4 changes: 4 additions & 0 deletions arch/x86/xen/suspend.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

void xen_arch_pre_suspend(void)
{
xen_save_time_memory_area();

if (xen_pv_domain())
xen_pv_pre_suspend();
}
Expand All @@ -26,6 +28,8 @@ void xen_arch_post_suspend(int cancelled)
xen_pv_post_suspend(cancelled);
else
xen_hvm_post_suspend(cancelled);

xen_restore_time_memory_area();
}

static void xen_vcpu_notify_restore(void *data)
Expand Down
90 changes: 89 additions & 1 deletion arch/x86/xen/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,92 @@ static const struct pv_time_ops xen_time_ops __initconst = {
.steal_clock = xen_steal_clock,
};

static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;

void xen_save_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;

if (!xen_clock)
return;

t.addr.v = NULL;

ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
if (ret != 0)
pr_notice("Cannot save secondary vcpu_time_info (err %d)",
ret);
else
clear_page(xen_clock);
}

void xen_restore_time_memory_area(void)
{
struct vcpu_register_time_memory_area t;
int ret;

if (!xen_clock)
return;

t.addr.v = &xen_clock->pvti;

ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);

/*
* We don't disable VCLOCK_PVCLOCK entirely if it fails to register the
* secondary time info with Xen or if we migrated to a host without the
* necessary flags. On both of these cases what happens is either
* process seeing a zeroed out pvti or seeing no PVCLOCK_TSC_STABLE_BIT
* bit set. Userspace checks the latter and if 0, it discards the data
* in pvti and fallbacks to a system call for a reliable timestamp.
*/
if (ret != 0)
pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
ret);
}

static void xen_setup_vsyscall_time_info(void)
{
struct vcpu_register_time_memory_area t;
struct pvclock_vsyscall_time_info *ti;
int ret;

ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
if (!ti)
return;

t.addr.v = &ti->pvti;

ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
if (ret) {
pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret);
free_page((unsigned long)ti);
return;
}

/*
* If primary time info had this bit set, secondary should too since
* it's the same data on both just different memory regions. But we
* still check it in case hypervisor is buggy.
*/
if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
t.addr.v = NULL;
ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
0, &t);
if (!ret)
free_page((unsigned long)ti);

pr_notice("xen: VCLOCK_PVCLOCK not supported (tsc unstable)\n");
return;
}

xen_clock = ti;
pvclock_set_pvti_cpu0_va(xen_clock);

xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK;
}

static void __init xen_time_init(void)
{
struct pvclock_vcpu_time_info *pvti;
Expand Down Expand Up @@ -401,8 +487,10 @@ static void __init xen_time_init(void)
* bit is supported hence speeding up Xen clocksource.
*/
pvti = &__this_cpu_read(xen_vcpu)->time;
if (pvti->flags & PVCLOCK_TSC_STABLE_BIT)
if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
xen_setup_vsyscall_time_info();
}

xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/xen/xen-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
u64 xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
void xen_save_time_memory_area(void);
void xen_restore_time_memory_area(void);
void __init xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void);

Expand Down
42 changes: 42 additions & 0 deletions include/xen/interface/vcpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,46 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);

/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
#define VCPUOP_send_nmi 11

/*
* Get the physical ID information for a pinned vcpu's underlying physical
* processor. The physical ID informmation is architecture-specific.
* On x86: id[31:0]=apic_id, id[63:32]=acpi_id.
* This command returns -EINVAL if it is not a valid operation for this VCPU.
*/
#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */
struct vcpu_get_physid {
uint64_t phys_id;
};
DEFINE_GUEST_HANDLE_STRUCT(vcpu_get_physid);
#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid))
#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32))

/*
* Register a memory location to get a secondary copy of the vcpu time
* parameters. The master copy still exists as part of the vcpu shared
* memory area, and this secondary copy is updated whenever the master copy
* is updated (and using the same versioning scheme for synchronisation).
*
* The intent is that this copy may be mapped (RO) into userspace so
* that usermode can compute system time using the time info and the
* tsc. Usermode will see an array of vcpu_time_info structures, one
* for each vcpu, and choose the right one by an existing mechanism
* which allows it to get the current vcpu number (such as via a
* segment limit). It can then apply the normal algorithm to compute
* system time from the tsc.
*
* @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
*/
#define VCPUOP_register_vcpu_time_memory_area 13
DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info);
struct vcpu_register_time_memory_area {
union {
GUEST_HANDLE(vcpu_time_info) h;
struct pvclock_vcpu_time_info *v;
uint64_t p;
} addr;
};
DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area);

#endif /* __XEN_PUBLIC_VCPU_H__ */

0 comments on commit 2229f70

Please sign in to comment.