|
| 1 | +// Copyright (C) 2021-2023 The Aero Project Developers. |
| 2 | +// |
| 3 | +// This file is part of The Aero Project. |
| 4 | +// |
| 5 | +// Aero is free software: you can redistribute it and/or modify |
| 6 | +// it under the terms of the GNU General Public License as published by |
| 7 | +// the Free Software Foundation, either version 3 of the License, or |
| 8 | +// (at your option) any later version. |
| 9 | +// |
| 10 | +// Aero is distributed in the hope that it will be useful, |
| 11 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | +// GNU General Public License for more details. |
| 14 | +// |
| 15 | +// You should have received a copy of the GNU General Public License |
| 16 | +// along with Aero. If not, see <https://www.gnu.org/licenses/>. |
| 17 | + |
| 18 | +#[no_mangle] |
| 19 | +#[naked] |
| 20 | +unsafe extern "C" fn memcpy_movsq(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 { |
| 21 | + // Registers used: |
| 22 | + // |
| 23 | + // %rdi = argument 1, `dest` |
| 24 | + // %rsi = argument 2, `src` |
| 25 | + // %rdx = argument 3, `len` |
| 26 | + asm!( |
| 27 | + // Save the return value. |
| 28 | + "mov rax, rdi", |
| 29 | + // Copy in 8 byte chunks. |
| 30 | + "mov rcx, rdx", |
| 31 | + "shr rcx, 3", |
| 32 | + "rep movsq", |
| 33 | + // Copy the rest. |
| 34 | + "mov rcx, rdx", |
| 35 | + "and rcx, 0x7", |
| 36 | + "rep movsb", |
| 37 | + "ret", |
| 38 | + options(noreturn) |
| 39 | + ); |
| 40 | +} |
| 41 | + |
| 42 | +#[no_mangle] |
| 43 | +#[naked] |
| 44 | +unsafe extern "C" fn memset_stosq(dest: *mut u8, byte: i32, len: usize) -> *mut u8 { |
| 45 | + // Registers used: |
| 46 | + // |
| 47 | + // %rdi = argument 1, `dest` |
| 48 | + // %rsi = argument 2, `byte` |
| 49 | + // %rdx = argument 3, `len` |
| 50 | + asm!( |
| 51 | + // Save the return value. |
| 52 | + "mov r11, rdi", |
| 53 | + // Create an 8-byte copy of the pattern. |
| 54 | + "mov rcx, rdx", |
| 55 | + "movzx rax, sil", |
| 56 | + "mov r10, 0x0101010101010101", |
| 57 | + "mul r10", |
| 58 | + "mov rdx, rcx", |
| 59 | + // Copy in 8 byte chunks. |
| 60 | + "shr rcx, 3", |
| 61 | + "rep stosq", |
| 62 | + // Copy the rest. |
| 63 | + "mov rcx, rdx", |
| 64 | + "and rcx, 0x7", |
| 65 | + "rep stosb", |
| 66 | + // Restore the return value. |
| 67 | + "mov rax, r11", |
| 68 | + "ret", |
| 69 | + options(noreturn) |
| 70 | + ) |
| 71 | +} |
| 72 | + |
| 73 | +// FIXME(andypython): pick the best implementation for the current CPU using indirect functions. |
| 74 | + |
| 75 | +#[no_mangle] |
| 76 | +extern "C" fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 { |
| 77 | + unsafe { memcpy_movsq(dest, src, len) } |
| 78 | +} |
| 79 | + |
| 80 | +#[no_mangle] |
| 81 | +extern "C" fn memset(dest: *mut u8, byte: i32, len: usize) -> *mut u8 { |
| 82 | + unsafe { memset_stosq(dest, byte, len) } |
| 83 | +} |
0 commit comments