Skip to content

Commit

Permalink
amd64: stop using top of the thread' kernel stack for FPU user save area
Browse files Browse the repository at this point in the history
Instead do one more allocation at the thread creation time.  This frees
a lot of space on the stack.

Also do not use alloca() for temporal storage in signal delivery sendsig()
function and signal return syscall sys_sigreturn().  This saves equal
amount of space, again by the cost of one more allocation at the thread
creation time.

A useful experiment now would be to reduce KSTACK_PAGES.

Reviewed by:	jhb, markj
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D31954
  • Loading branch information
kostikbel committed Sep 21, 2021
1 parent 0f68294 commit df8dd60
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 29 deletions.
4 changes: 2 additions & 2 deletions sys/amd64/amd64/exec_machdep.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)

if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
xfpusave = __builtin_alloca(xfpusave_len);
xfpusave = (char *)td->td_md.md_fpu_scratch;
} else {
xfpusave_len = 0;
xfpusave = NULL;
Expand Down Expand Up @@ -674,7 +674,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
sizeof(struct savefpu))
return (EINVAL);
xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
xfpustate = (char *)td->td_md.md_fpu_scratch;
ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
mcp->mc_xfpustate_len);
if (ret != 0)
Expand Down
2 changes: 2 additions & 0 deletions sys/amd64/amd64/fpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ fpuinitstate(void *arg __unused)
xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
}

cpu_thread_alloc(&thread0);

saveintr = intr_disable();
stop_emulating();

Expand Down
14 changes: 0 additions & 14 deletions sys/amd64/amd64/machdep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
caddr_t kmdp;
int gsel_tss, x;
struct pcpu *pc;
struct xstate_hdr *xhdr;
uint64_t cr3, rsp0;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
Expand Down Expand Up @@ -1564,19 +1563,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
msgbufinit(msgbufp, msgbufsize);
fpuinit();

/*
* Reinitialize thread0's stack base now that the xsave area size is
* known. Set up thread0's pcb save area after fpuinit calculated fpu
* save area size. Zero out the extended state header in fpu save area.
*/
set_top_of_stack_td(&thread0);
thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
bzero(thread0.td_pcb->pcb_save, cpu_max_ext_state_size);
if (use_xsave) {
xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
1);
xhdr->xstate_bv = xsave_mask;
}
/* make an initial tss so cpu can get interrupt stack on syscall! */
rsp0 = thread0.td_md.md_stack_base;
/* Ensure the stack is aligned to 16 bytes */
Expand Down
22 changes: 13 additions & 9 deletions sys/amd64/amd64/vm_machdep.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,17 @@ void
set_top_of_stack_td(struct thread *td)
{
td->td_md.md_stack_base = td->td_kstack +
td->td_kstack_pages * PAGE_SIZE -
roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN);
td->td_kstack_pages * PAGE_SIZE;
}

struct savefpu *
get_pcb_user_save_td(struct thread *td)
{
vm_offset_t p;

p = td->td_md.md_stack_base;
KASSERT((p % XSAVE_AREA_ALIGN) == 0,
("Unaligned pcb_user_save area ptr %#lx td %p", p, td));
return ((struct savefpu *)p);
KASSERT(((vm_offset_t)td->td_md.md_usr_fpu_save %
XSAVE_AREA_ALIGN) == 0,
("Unaligned pcb_user_save area ptr %p td %p",
td->td_md.md_usr_fpu_save, td));
return (td->td_md.md_usr_fpu_save);
}

struct pcb *
Expand Down Expand Up @@ -393,6 +391,8 @@ cpu_thread_alloc(struct thread *td)
set_top_of_stack_td(td);
td->td_pcb = pcb = get_pcb_td(td);
td->td_frame = (struct trapframe *)td->td_md.md_stack_base - 1;
td->td_md.md_usr_fpu_save = fpu_save_area_alloc();
td->td_md.md_fpu_scratch = fpu_save_area_alloc();
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
if (use_xsave) {
xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
Expand All @@ -404,8 +404,12 @@ cpu_thread_alloc(struct thread *td)
void
cpu_thread_free(struct thread *td)
{

cpu_thread_clean(td);

fpu_save_area_free(td->td_md.md_usr_fpu_save);
td->td_md.md_usr_fpu_save = NULL;
fpu_save_area_free(td->td_md.md_fpu_scratch);
td->td_md.md_fpu_scratch = NULL;
}

bool
Expand Down
6 changes: 3 additions & 3 deletions sys/amd64/ia32/ia32_signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp)
if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
sizeof(struct savefpu))
return (EINVAL);
xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
xfpustate = (char *)td->td_md.md_fpu_scratch;
ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate,
mcp->mc_xfpustate_len);
if (ret != 0)
Expand Down Expand Up @@ -579,7 +579,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)

if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
xfpusave = __builtin_alloca(xfpusave_len);
xfpusave = (char *)td->td_md.md_fpu_scratch;
} else {
xfpusave_len = 0;
xfpusave = NULL;
Expand Down Expand Up @@ -882,7 +882,7 @@ freebsd32_sigreturn(td, uap)
td->td_proc->p_pid, td->td_name, xfpustate_len);
return (EINVAL);
}
xfpustate = __builtin_alloca(xfpustate_len);
xfpustate = (char *)td->td_md.md_fpu_scratch;
error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate),
xfpustate, xfpustate_len);
if (error != 0) {
Expand Down
2 changes: 2 additions & 0 deletions sys/amd64/include/proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ struct mdthread {
int md_efirt_dis_pf; /* (k) */
struct pcb md_pcb;
vm_offset_t md_stack_base;
struct savefpu *md_usr_fpu_save;
struct savefpu *md_fpu_scratch;
};

struct mdproc {
Expand Down
2 changes: 1 addition & 1 deletion sys/kern/kern_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ _Static_assert(offsetof(struct thread, td_pflags) == 0x110,
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x4a8,
"struct thread KBI td_frame");
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb8,
"struct proc KBI p_flag");
Expand Down

0 comments on commit df8dd60

Please sign in to comment.