Skip to content

Commit

Permalink
most of the way there on precomputation.
Browse files Browse the repository at this point in the history
  • Loading branch information
pkivolowitz committed Apr 13, 2024
1 parent 32fe3b3 commit 054a5de
Show file tree
Hide file tree
Showing 5 changed files with 406 additions and 0 deletions.
7 changes: 7 additions & 0 deletions section_3/precomputation/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"cSpell.words": [
"ifact",
"pfact",
"rfact"
]
}
85 changes: 85 additions & 0 deletions section_3/precomputation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,88 @@ Certainly, for the purposes of this demonstration, it is not necessary
to implement both iterative and recursive methods. We do so for fun and
for any lessons the reader can glean.

## C Driver

[Here](./main.c), you will find a version written in C. We will
repurpose `main()` to drive versions in assembly language.

## Iterative

```c
long Iterative(long n) {
long retval = 1;
for (long i = 1; i <= n; i++) {
retval *= i;
}
return retval;
}
```
First, notice that this algorithm's work increases linearly with the
parameter n. Therefore this algorithm is O(n).
We translated this function into assembly language to produce the code
provided below. This code is *condensed*. To see the original code with
comments, please see [here](/asm.S).
```asm
ifact:
mov x2, x0
mov x0, 1 // equivalent to retval = 1
mov x1, 1 // equivalent to i = 1
// This has five instructions (20 bytes) in the inner loop which
// increases in work by O(n).
10: cmp x1, x2
bgt 99f
mul x0, x0, x1
add x1, x1, 1
b 10b
99:
ret
```

Reminder, the above code is *condense*. You will note that the code that
performs the calculation is 5 instructions (or 20 bytes) long. This
isn't much but again, the algorithm runs in O(n) time.

## Recursive

```c
long Recursive(long n) {
long retval;
if (n <= 1)
retval = 1;
else
retval = n * Recursive(n - 1);

return retval;
}
```
The code below is *condensed*. The original code, with comments, can be
found [here](./asm.S).
```asm
rfact:
PUSH_P x29, x30
mov x29, sp
cmp x0, 1
bgt 10f
mov x0, 1 // ensure x0 is 1 - it could be less.
b 99f
10: // If we get here, n must be more then 1. Recursion is needed.
PUSH_R x0 // save the current n
sub x0, x0, 1 // prepare for recursion
bl rfact //
POP_R x1 // restore the current n
mul x0, x0, x1 // multiply it by recursive return
99: POP_P x29, x30
ret
```
156 changes: 156 additions & 0 deletions section_3/precomputation/apple-linux-convergence.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/* Macros to permit the "same" assembly language to build on ARM64
Linux systems as well as Apple Silicon systems.
See the fuller documentation at:
https://github.com/pkivolowitz/asm_book/blob/main/macros/README.md
Perry Kivolowitz
A Gentle Introduction to Assembly Language
*/

.macro GLD_PTR xreg, label
#if defined(__APPLE__)
adrp \xreg, _\label@GOTPAGE
ldr \xreg, [\xreg, _\label@GOTPAGEOFF]
#else
ldr \xreg, =\label
ldr \xreg, [\xreg]
#endif
.endm

.macro GLD_ADDR xreg, label // Get a global address
#if defined(__APPLE__)
adrp \xreg, _\label@GOTPAGE
add \xreg, \xreg, _\label@GOTPAGEOFF
#else
ldr \xreg, =\label
#endif
.endm

.macro LLD_ADDR xreg, label
#if defined(__APPLE__)
adrp \xreg, \label@PAGE
add \xreg, \xreg, \label@PAGEOFF
#else
ldr \xreg, =\label
#endif
.endm

.macro LLD_DBL xreg, dreg, label
#if defined(__APPLE__)
adrp \xreg, \label@PAGE
add \xreg, \xreg, \label@PAGEOFF
ldur \dreg, [\xreg]
// fmov \dreg, \xreg
#else
ldr \xreg, =\label
ldur \dreg, [\xreg]
#endif
.endm

.macro LLD_FLT xreg, sreg, label
#if defined(__APPLE__)
adrp \xreg, \label@PAGE
add \xreg, \xreg, \label@PAGEOFF
ldur \sreg, [\xreg]
#else
ldr \xreg, =\label
ldur \sreg, [\xreg]
#endif
.endm

.macro GLABEL label
#if defined(__APPLE__)
.global _\label
#else
.global \label
#endif
.endm

.macro MAIN
#if defined(__APPLE__)
_main:
#else
main:
#endif
.endm

/* Fetching the address of the externally defined errno is quite
different on Apple and Linux. This macro leaves the address of
errno in x0.
*/
.macro ERRNO_ADDR
#if defined(__APPLE__)
bl ___error
#else
bl __errno_location
#endif
.endm

.macro CRT label
#if defined(__APPLE__)
bl _\label
#else
bl \label
#endif
.endm

.macro START_PROC // after starting label
.cfi_startproc
.endm

.macro END_PROC // after the return
.cfi_endproc
.endm

.macro PUSH_P a, b
stp \a, \b, [sp, -16]!
.endm

.macro PUSH_R a
str \a, [sp, -16]!
.endm

.macro POP_P a, b
ldp \a, \b, [sp], 16
.endm

.macro POP_R a
ldr \a, [sp], 16
.endm

/* The smaller of src_a and src_b is put into dest. A cmp instruction
or other instruction that sets the flags must be performed first.
This macro makes it easy to remember which register does what in the
csel.
Thank you to u/TNorthover for nudge to add the cmp.
*/

.macro MIN src_a, src_b, dest
cmp \src_a, \src_b
csel \dest, \src_a, \src_b, LT
.endm

/* The larger of src_a and src_b is put into dest. A cmp instruction
or other instruction that sets the flags must be performed first.
This macro makes it easy to remember which register does what in the
csel.
Thank you to u/TNorthover for nudge to add the cmp.
*/

.macro MAX src_a, src_b, dest
cmp \src_a, \src_b
csel \dest, \src_a, \src_b, GT
.endm

.macro AASCIZ label, string
.p2align 2
\label: .asciz "\string"
.endm

.macro MOD src_a, src_b, dest, scratch
sdiv \scratch, \src_a, \src_b
msub \dest, \scratch, \src_b, \src_a
.endm
Loading

0 comments on commit 054a5de

Please sign in to comment.