Skip to content

Commit

Permalink
Support "CLS": safely defining arbitrary CPU-local variables (theseus…
Browse files Browse the repository at this point in the history
…-os#1028)

* This commit allows any crate to safely define a CPU-local variable
  of its choice without needing to include it in a hardcoded struct
  containing all CPU-local variables.
  * Previously, the hardcoded approach was used, via the
    `PerCpuData` struct, but this was inflexible and required
    some unsafety to define and access a CPU-local variable.

* Now, CPU-local variables, or CLS (Cpu-Local Storage), are declared
  using the `#[cpu_local]` procedural macro, which is implemented
  in the `cpu_macros` crate.
  * This macro declares CLS a `#[thread_local]`s (TLS), but places them
    in a custom link section `.cls`, allowing us to differentiate them
    from standard, real TLS variables.
  * These special CLS variables are not accessed using the
    standard TLS instructions, but rather we implement special accessors
    for them in order to ensure they use the arch-specific base registers
    for CPU-locals, not thread-locals.
    * CPU-local variables are accessed as an offset from the `%gs`
      segment register on x86_64, and the `TPIDR_EL1` register on aarch64.
  * The linker scripts have been changed to place the `.cls` section
    adjacent to the TLS sections, either before or after depending
    on the target architecture.
  * Then, in an extra build step, we run the `elf_cls` tool on the
    ELF object files outputted by the compiler in order to fixup the
    CLS sections' flags, symbol types, and symbol values.
    For example, we use an OS-specific ELF section/symbol type
    for CLS variables in order to properly identify them at runtime.
  * Separate control flows have been added to `mod_mgmt`
    to handle loading & linking CLS sections at runtime.

* See the crate-level documentaiton of `cls` for more info.

* The logic previously in `tls_initializer` is now re-used for CLS,
  because this new CLS implementation uses an augmented version
  of traditional TLS.
  * Thus, `tls_initializer` has been moved into the new
    `local_storage_initializer` crate, which offers a generic
    implementation with two specific instantiations: TLS and CLS.

* The new `cls_allocator` crate handles allocations of CLS variables
  and reloading the CLS regions and CLS base register as necessary.

Signed-off-by: Klimenty Tsoutsman <[email protected]>
Co-authored-by: Kevin Boos <[email protected]>
  • Loading branch information
tsoutsman and kevinaboos authored Sep 1, 2023
1 parent 27429e5 commit b1a0a14
Show file tree
Hide file tree
Showing 42 changed files with 1,532 additions and 772 deletions.
84 changes: 30 additions & 54 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ else
$(error Error: unsupported option "debug=$(debug)". Options are 'full', 'none', or 'base')
endif

## Sixth, fix up CPU local sections.
@echo -e "Parsing CPU local sections"
@cargo r --release --manifest-path $(ROOT_DIR)/tools/elf_cls/Cargo.toml -- $(ARCH) --dir $(OBJECT_FILES_BUILD_DIR)

#############################
### end of "build" target ###
#############################
Expand Down Expand Up @@ -349,6 +353,8 @@ endif
## This builds the nano_core binary itself, which is the fully-linked code that first runs right after the bootloader
$(nano_core_binary): cargo $(nano_core_static_lib) $(linker_script)
$(CROSS)ld -n -T $(linker_script) -o $(nano_core_binary) $(compiled_nano_core_asm) $(nano_core_static_lib)
## Fix up CLS sections.
cargo r --release --manifest-path $(ROOT_DIR)/tools/elf_cls/Cargo.toml -- $(ARCH) --file $(nano_core_binary)
## Dump readelf output for verification. See pull request #542 for more details:
## @RUSTFLAGS="" cargo run --release --manifest-path $(ROOT_DIR)/tools/demangle_readelf_file/Cargo.toml \
## <($(CROSS)readelf -s -W $(nano_core_binary) | sed '/OBJECT LOCAL .* str\./d;/NOTYPE LOCAL /d;/FILE LOCAL /d;/SECTION LOCAL /d;') \
Expand Down
5 changes: 5 additions & 0 deletions cfg/partial_linking_combine_sections.ld
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ SECTIONS {
*(.gcc_except_table .gcc_except_table.*)
}

.cls :
{
*(.cls .cls.*)
}

.tdata :
{
*(.tdata .tdata.*)
Expand Down
2 changes: 1 addition & 1 deletion kernel/ap_start/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ interrupts = { path = "../interrupts" }
scheduler = { path = "../scheduler" }
spawn = { path = "../spawn" }
kernel_config = { path = "../kernel_config" }
cls_allocator = { path = "../cls_allocator" }
cpu = { path = "../cpu" }
per_cpu = { path = "../per_cpu" }
no_drop = { path = "../no_drop" }
early_tls = { path = "../early_tls" }

Expand Down
2 changes: 1 addition & 1 deletion kernel/ap_start/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ pub fn kstart_ap(

// Now that the Local APIC has been initialized for this CPU, we can initialize the
// per-CPU storage, tasking, and create the idle task for this CPU.
per_cpu::init(cpu_id).unwrap();
cls_allocator::reload_current_cpu();
let bootstrap_task = spawn::init(kernel_mmi_ref.clone(), cpu_id, this_ap_stack).unwrap();

// The PAT must be initialized explicitly on every CPU,
Expand Down
2 changes: 1 addition & 1 deletion kernel/captain/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ multicore_bringup = { path = "../multicore_bringup" }
device_manager = { path = "../device_manager" }
early_printer = { path = "../early_printer" }
tlb_shootdown = { path = "../tlb_shootdown" }
cls_allocator = { path = "../cls_allocator" }
kernel_config = { path = "../kernel_config" }
interrupts = { path = "../interrupts" }
scheduler = { path = "../scheduler" }
Expand All @@ -28,7 +29,6 @@ spawn = { path = "../spawn" }
stack = { path = "../stack" }
task = { path = "../task" }
cpu = { path = "../cpu" }
per_cpu = { path = "../per_cpu" }
first_application = { path = "../first_application" }

[target.'cfg(target_arch = "x86_64")'.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion kernel/captain/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ pub fn init(

// get BSP's CPU ID
let bsp_id = cpu::bootstrap_cpu().ok_or("captain::init(): couldn't get ID of bootstrap CPU!")?;
per_cpu::init(bsp_id)?;
cls_allocator::reload_current_cpu();

// Initialize the scheduler and create the initial `Task`,
// which is bootstrapped from this current execution context.
Expand Down
36 changes: 23 additions & 13 deletions kernel/cls/cls_macros/src/int.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use proc_macro2::TokenStream;
use quote::{quote, ToTokens};
use syn::{LitInt, Type};
use syn::{Ident, Type};

pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {
let ((x64_asm_width, x64_reg_class), (aarch64_reg_modifier, aarch64_instr_width)) =
use crate::cls_offset_expr;

pub(crate) fn int_functions(ty: &Type, name: &Ident) -> Option<TokenStream> {
let ((x86_64_asm_width, x86_64_reg_class), (aarch64_reg_modifier, aarch64_instr_width)) =
match ty.to_token_stream().to_string().as_ref() {
"u8" => (("byte", quote! { reg_byte }), (":w", "b")),
"u16" => (("word", quote! { reg }), (":w", "w")),
Expand All @@ -13,20 +15,22 @@ pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {
return None;
}
};
let x64_width_modifier = format!("{x64_asm_width} ptr ");
let x64_cls_location = format!("gs:[{offset}]");
let x86_64_width_modifier = format!("{x86_64_asm_width} ptr ");
let offset_expr = cls_offset_expr(name);

Some(quote! {
#[inline]
pub fn load(&self) -> #ty {
let offset = #offset_expr;
#[cfg(target_arch = "x86_64")]
{
let ret;
unsafe {
::core::arch::asm!(
::core::concat!("mov {}, ", #x64_cls_location),
out(#x64_reg_class) ret,
options(preserves_flags, nostack),
::core::concat!("mov {ret}, gs:[{offset}]"),
ret = out(#x86_64_reg_class) ret,
offset = in(reg) offset,
options(readonly, preserves_flags, nostack),
)
};
ret
Expand All @@ -39,10 +43,11 @@ pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {
"2:",
// Load value.
"mrs {tp_1}, tpidr_el1",
"add {ptr}, {tp_1}, {offset}",
concat!(
"ldr", #aarch64_instr_width,
" {ret", #aarch64_reg_modifier,"},",
" [{tp_1},#", stringify!(#offset), "]",
" [{ptr}]",
),

// Make sure task wasn't migrated between mrs and ldr.
Expand All @@ -51,10 +56,12 @@ pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {
"b.ne 2b",

tp_1 = out(reg) _,
ptr = out(reg) _,
offset = in(reg) offset,
ret = out(reg) ret,
tp_2 = out(reg) _,

options(nostack),
options(readonly, nostack),
)
};
ret
Expand All @@ -63,12 +70,14 @@ pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {

#[inline]
pub fn fetch_add(&self, mut operand: #ty) -> #ty {
let offset = #offset_expr;
#[cfg(target_arch = "x86_64")]
{
unsafe {
::core::arch::asm!(
::core::concat!("xadd ", #x64_width_modifier, #x64_cls_location, ", {}"),
inout(#x64_reg_class) operand,
::core::concat!("xadd ", #x86_64_width_modifier, "gs:[{offset}], {operand}"),
offset = in(reg) offset,
operand = inout(#x86_64_reg_class) operand,
options(nostack),
)
};
Expand All @@ -82,7 +91,7 @@ pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {
"2:",
// Load value.
"mrs {tp_1}, tpidr_el1",
concat!("add {ptr}, {tp_1}, ", stringify!(#offset)),
"add {ptr}, {tp_1}, {offset}",
concat!("ldxr", #aarch64_instr_width, " {value", #aarch64_reg_modifier,"}, [{ptr}]"),

// Make sure task wasn't migrated between msr and ldxr.
Expand All @@ -99,6 +108,7 @@ pub(crate) fn int_functions(ty: Type, offset: LitInt) -> Option<TokenStream> {

tp_1 = out(reg) ret,
ptr = out(reg) _,
offset = in(reg) offset,
value = out(reg) ret,
tp_2 = out(reg) _,
operand = in(reg) operand,
Expand Down
Loading

0 comments on commit b1a0a14

Please sign in to comment.