Skip to content

Commit 02ed486

Browse files
x86_64: more optimized versions of mem{cpy,set}
Signed-off-by: Anhad Singh <[email protected]>
1 parent a2bad6d commit 02ed486

File tree

3 files changed

+87
-4
lines changed

3 files changed

+87
-4
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright (C) 2021-2023 The Aero Project Developers.
2+
//
3+
// This file is part of The Aero Project.
4+
//
5+
// Aero is free software: you can redistribute it and/or modify
6+
// it under the terms of the GNU General Public License as published by
7+
// the Free Software Foundation, either version 3 of the License, or
8+
// (at your option) any later version.
9+
//
10+
// Aero is distributed in the hope that it will be useful,
11+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
// GNU General Public License for more details.
14+
//
15+
// You should have received a copy of the GNU General Public License
16+
// along with Aero. If not, see <https://www.gnu.org/licenses/>.
17+
18+
#[no_mangle]
19+
#[naked]
20+
unsafe extern "C" fn memcpy_movsq(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
21+
// Registers used:
22+
//
23+
// %rdi = argument 1, `dest`
24+
// %rsi = argument 2, `src`
25+
// %rdx = argument 3, `len`
26+
asm!(
27+
// Save the return value.
28+
"mov rax, rdi",
29+
// Copy in 8 byte chunks.
30+
"mov rcx, rdx",
31+
"shr rcx, 3",
32+
"rep movsq",
33+
// Copy the rest.
34+
"mov rcx, rdx",
35+
"and rcx, 0x7",
36+
"rep movsb",
37+
"ret",
38+
options(noreturn)
39+
);
40+
}
41+
42+
#[no_mangle]
43+
#[naked]
44+
unsafe extern "C" fn memset_stosq(dest: *mut u8, byte: i32, len: usize) -> *mut u8 {
45+
// Registers used:
46+
//
47+
// %rdi = argument 1, `dest`
48+
// %rsi = argument 2, `byte`
49+
// %rdx = argument 3, `len`
50+
asm!(
51+
// Save the return value.
52+
"mov r11, rdi",
53+
// Create an 8-byte copy of the pattern.
54+
"mov rcx, rdx",
55+
"movzx rax, sil",
56+
"mov r10, 0x0101010101010101",
57+
"mul r10",
58+
"mov rdx, rcx",
59+
// Copy in 8 byte chunks.
60+
"shr rcx, 3",
61+
"rep stosq",
62+
// Copy the rest.
63+
"mov rcx, rdx",
64+
"and rcx, 0x7",
65+
"rep stosb",
66+
// Restore the return value.
67+
"mov rax, r11",
68+
"ret",
69+
options(noreturn)
70+
)
71+
}
72+
73+
// FIXME(andypython): pick the best implementation for the current CPU using indirect functions.
74+
75+
#[no_mangle]
76+
extern "C" fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
77+
unsafe { memcpy_movsq(dest, src, len) }
78+
}
79+
80+
#[no_mangle]
81+
extern "C" fn memset(dest: *mut u8, byte: i32, len: usize) -> *mut u8 {
82+
unsafe { memset_stosq(dest, byte, len) }
83+
}

src/aero_kernel/src/arch/x86_64/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub mod controlregs;
2020
pub mod gdt;
2121
pub mod interrupts;
2222
pub mod io;
23+
pub mod mem;
2324
pub mod signals;
2425
pub mod syscall;
2526
pub mod task;
@@ -32,7 +33,6 @@ use core::sync::atomic::Ordering;
3233
use crate::acpi::aml;
3334
use crate::{acpi, cmdline};
3435

35-
use crate::mem;
3636
use crate::mem::paging;
3737
use crate::mem::paging::VirtAddr;
3838

@@ -136,7 +136,7 @@ extern "C" fn arch_aero_main() -> ! {
136136
paging::init(memmap).unwrap();
137137
log::info!("loaded paging");
138138

139-
mem::alloc::init_heap();
139+
crate::mem::alloc::init_heap();
140140
log::info!("loaded heap");
141141

142142
// SMP initialization.

src/aero_kernel/src/arch/x86_64/user_copy.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ unsafe extern "C" fn copy_to_from_user(
4646
) -> bool {
4747
// Registers used:
4848
//
49-
// %rax = argument 1, `dest`
49+
// %rdi = argument 1, `dest`
5050
// %rsi = argument 2, `src`
5151
// %rdx = argument 3, `size`
52-
// %rcx = argument 4, `fault_resume`
52+
// %rcx = argument 4, `fault_resume` (copied to %r10)
5353
asm!(
5454
// Copy `fault_resume` out of %rcx because it will be utilized by `rep movsb` latter.
5555
"mov r10, rcx",

0 commit comments

Comments
 (0)