forked from abeluck/android-ffmpeg
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add some ARM optimizations from libav
- Loading branch information
Showing
3 changed files
with
282 additions
and
0 deletions.
There are no files selected for viewing
226 changes: 226 additions & 0 deletions
226
ARM_generate_position_independent_code_to_access_data_symbols.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
commit 62634158b7cd39ad1e330a87153a97bf3dc6f8de | ||
Author: Mans Rullgard <[email protected]> | ||
Date: Fri Jun 29 13:35:08 2012 +0100 | ||
|
||
ARM: generate position independent code to access data symbols | ||
|
||
This creates proper position independent code when accessing | ||
data symbols if CONFIG_PIC is set. | ||
|
||
References to external symbols should now use the movrelx macro. | ||
Some additional code changes are required since this macro may | ||
need a register to hold the GOT pointer. | ||
|
||
Signed-off-by: Mans Rullgard <[email protected]> | ||
|
||
diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S | ||
index 7e2f40e..f6f297a 100644 | ||
--- a/libavcodec/arm/ac3dsp_armv6.S | ||
+++ b/libavcodec/arm/ac3dsp_armv6.S | ||
@@ -26,8 +26,8 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 | ||
beq 4f | ||
push {r4-r11,lr} | ||
add r5, sp, #40 | ||
- movrel r4, X(ff_ac3_bin_to_band_tab) | ||
- movrel lr, X(ff_ac3_band_start_tab) | ||
+ movrelx r4, X(ff_ac3_bin_to_band_tab), r11 | ||
+ movrelx lr, X(ff_ac3_band_start_tab) | ||
ldm r5, {r5-r7} | ||
ldrb r4, [r4, r2] | ||
add r1, r1, r2, lsl #1 @ psd + start | ||
diff --git a/libavcodec/arm/fft_fixed_neon.S b/libavcodec/arm/fft_fixed_neon.S | ||
index 4d891ba..faddc00 100644 | ||
--- a/libavcodec/arm/fft_fixed_neon.S | ||
+++ b/libavcodec/arm/fft_fixed_neon.S | ||
@@ -214,7 +214,7 @@ function fft\n\()_neon | ||
bl fft\n4\()_neon | ||
mov r0, r4 | ||
pop {r4, lr} | ||
- movrel r1, X(ff_cos_\n\()_fixed) | ||
+ movrelx r1, X(ff_cos_\n\()_fixed) | ||
mov r2, #\n4/2 | ||
b fft_pass_neon | ||
endfunc | ||
diff --git a/libavcodec/arm/fft_neon.S b/libavcodec/arm/fft_neon.S | ||
index aa06e6d..c4d8918 100644 | ||
--- a/libavcodec/arm/fft_neon.S | ||
+++ b/libavcodec/arm/fft_neon.S | ||
@@ -143,7 +143,7 @@ function fft16_neon | ||
vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14} | ||
vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6} | ||
vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a} | ||
- movrel r2, X(ff_cos_16) | ||
+ movrelx r2, X(ff_cos_16) | ||
vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8} | ||
vrev64.32 d1, d1 | ||
vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a} | ||
@@ -290,7 +290,7 @@ function fft\n\()_neon | ||
bl fft\n4\()_neon | ||
mov r0, r4 | ||
pop {r4, lr} | ||
- movrel r1, X(ff_cos_\n) | ||
+ movrelx r1, X(ff_cos_\n) | ||
mov r2, #\n4/2 | ||
b fft_pass_neon | ||
endfunc | ||
diff --git a/libavcodec/arm/sbrdsp_neon.S b/libavcodec/arm/sbrdsp_neon.S | ||
index 4b681bf..610397f 100644 | ||
--- a/libavcodec/arm/sbrdsp_neon.S | ||
+++ b/libavcodec/arm/sbrdsp_neon.S | ||
@@ -307,8 +307,8 @@ function ff_sbr_hf_apply_noise_0_neon, export=1 | ||
vmov.i32 d3, #0 | ||
.Lhf_apply_noise_0: | ||
push {r4,lr} | ||
+ movrelx r4, X(ff_sbr_noise_table) | ||
ldr r12, [sp, #12] | ||
- movrel r4, X(ff_sbr_noise_table) | ||
add r3, r3, #1 | ||
bfc r3, #9, #23 | ||
sub r12, r12, #1 | ||
@@ -355,8 +355,8 @@ function ff_sbr_hf_apply_noise_1_neon, export=1 | ||
eor lr, r12, #1<<31 | ||
vmov d3, r12, lr | ||
.Lhf_apply_noise_1: | ||
+ movrelx r4, X(ff_sbr_noise_table) | ||
ldr r12, [sp, #12] | ||
- movrel r4, X(ff_sbr_noise_table) | ||
add r3, r3, #1 | ||
bfc r3, #9, #23 | ||
sub r12, r12, #1 | ||
diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S | ||
index 8d22d00..2a9b25f 100644 | ||
--- a/libavcodec/arm/vp3dsp_neon.S | ||
+++ b/libavcodec/arm/vp3dsp_neon.S | ||
@@ -116,9 +116,8 @@ function vp3_idct_start_neon | ||
vadd.s16 q1, q8, q12 | ||
vsub.s16 q8, q8, q12 | ||
vld1.64 {d28-d31}, [r2,:128]! | ||
-endfunc | ||
|
||
-function vp3_idct_core_neon | ||
+vp3_idct_core_neon: | ||
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 | ||
vmull.s16 q3, d19, xC1S7 | ||
vmull.s16 q4, d2, xC4S4 // ((ip[0] + ip[4]) * C4) << 16 | ||
diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S | ||
index 1fa6d15..1b668bc 100644 | ||
--- a/libavcodec/arm/vp8_armv6.S | ||
+++ b/libavcodec/arm/vp8_armv6.S | ||
@@ -65,7 +65,7 @@ T orrcs \cw, \cw, \t1 | ||
|
||
function ff_decode_block_coeffs_armv6, export=1 | ||
push {r0,r1,r4-r11,lr} | ||
- movrel lr, X(ff_vp56_norm_shift) | ||
+ movrelx lr, X(ff_vp56_norm_shift) | ||
ldrd r4, r5, [sp, #44] @ token_prob, qmul | ||
cmp r3, #0 | ||
ldr r11, [r5] | ||
@@ -206,7 +206,7 @@ A orrcs r8, r8, r10, lsl r6 | ||
mov r9, #8 | ||
it ge | ||
addge r12, r12, #1 | ||
- movrel r4, X(ff_vp8_dct_cat_prob) | ||
+ movrelx r4, X(ff_vp8_dct_cat_prob), r1 | ||
lsl r9, r9, r12 | ||
ldr r4, [r4, r12, lsl #2] | ||
add r12, r9, #3 | ||
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S | ||
index 6038a63..1508180 100644 | ||
--- a/libavcodec/arm/asm.S | ||
+++ b/libavcodec/arm/asm.S | ||
@@ -62,7 +62,14 @@ ELF .eabi_attribute 25, \val | ||
.endm | ||
|
||
.macro function name, export=0 | ||
+ .set .Lpic_idx, 0 | ||
+ .set .Lpic_gp, 0 | ||
.macro endfunc | ||
+ .if .Lpic_idx | ||
+ .altmacro | ||
+ put_pic %(.Lpic_idx - 1) | ||
+ .noaltmacro | ||
+ .endif | ||
ELF .size \name, . - \name | ||
.endfunc | ||
.purgem endfunc | ||
@@ -106,8 +113,44 @@ ELF .size \name, . - \name | ||
#endif | ||
.endm | ||
|
||
+.macro put_pic num | ||
+ put_pic_\num | ||
+.endm | ||
+ | ||
+.macro do_def_pic num, val, label | ||
+ .macro put_pic_\num | ||
+ .if \num | ||
+ .altmacro | ||
+ put_pic %(\num - 1) | ||
+ .noaltmacro | ||
+ .endif | ||
+\label: .word \val | ||
+ .purgem put_pic_\num | ||
+ .endm | ||
+.endm | ||
+ | ||
+.macro def_pic val, label | ||
+ .altmacro | ||
+ do_def_pic %.Lpic_idx, \val, \label | ||
+ .noaltmacro | ||
+ .set .Lpic_idx, .Lpic_idx + 1 | ||
+.endm | ||
+ | ||
+.macro ldpic rd, val, indir=0 | ||
+ ldr \rd, .Lpicoff\@ | ||
+.Lpic\@: | ||
+ .if \indir | ||
+ ldr \rd, [pc, \rd] | ||
+ .else | ||
+ add \rd, pc, \rd | ||
+ .endif | ||
+ def_pic \val - (.Lpic\@ + (8 >> CONFIG_THUMB)), .Lpicoff\@ | ||
+.endm | ||
+ | ||
.macro movrel rd, val | ||
-#if HAVE_ARMV6T2 && !CONFIG_PIC && !defined(__APPLE__) | ||
+#if CONFIG_PIC | ||
+ ldpic \rd, \val | ||
+#elif HAVE_ARMV6T2 && !defined(__APPLE__) | ||
movw \rd, #:lower16:\val | ||
movt \rd, #:upper16:\val | ||
#else | ||
@@ -115,6 +158,34 @@ ELF .size \name, . - \name | ||
#endif | ||
.endm | ||
|
||
+.macro movrelx rd, val, gp | ||
+#if CONFIG_PIC && defined(__ELF__) | ||
+ .ifnb \gp | ||
+ .if .Lpic_gp | ||
+ .unreq gp | ||
+ .endif | ||
+ gp .req \gp | ||
+ ldpic gp, _GLOBAL_OFFSET_TABLE_ | ||
+ .elseif !.Lpic_gp | ||
+ gp .req r12 | ||
+ ldpic gp, _GLOBAL_OFFSET_TABLE_ | ||
+ .endif | ||
+ .set .Lpic_gp, 1 | ||
+ ldr \rd, .Lpicoff\@ | ||
+ ldr \rd, [gp, \rd] | ||
+ def_pic \val(GOT), .Lpicoff\@ | ||
+#elif CONFIG_PIC && defined(__APPLE__) | ||
+ ldpic \rd, .Lpic\@, indir=1 | ||
+ .non_lazy_symbol_pointer | ||
+.Lpic\@: | ||
+ .indirect_symbol \val | ||
+ .word 0 | ||
+ .text | ||
+#else | ||
+ movrel \rd, \val | ||
+#endif | ||
+.endm | ||
+ | ||
.macro ldr_pre rt, rn, rm:vararg | ||
A ldr \rt, [\rn, \rm]! | ||
T add \rn, \rn, \rm |
52 changes: 52 additions & 0 deletions
52
ARM_intmath_use_native-size_return_types_for_clipping_functions.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
commit 87fa05a0dae629cdad390adaa1054db3f4ecc3c7 | ||
Author: Mans Rullgard <[email protected]> | ||
Date: Sat Aug 11 04:08:15 2012 +0100 | ||
|
||
ARM: intmath: use native-size return types for clipping functions | ||
|
||
This avoids having the compiler redundantly mask the values to | ||
the smaller size. | ||
|
||
Signed-off-by: Mans Rullgard <[email protected]> | ||
|
||
diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h | ||
index d5a343c..88e9c26 100644 | ||
--- a/libavutil/arm/intmath.h | ||
+++ b/libavutil/arm/intmath.h | ||
@@ -44,7 +44,7 @@ static av_always_inline av_const int FASTDIV(int a, int b) | ||
} | ||
|
||
#define av_clip_uint8 av_clip_uint8_arm | ||
-static av_always_inline av_const uint8_t av_clip_uint8_arm(int a) | ||
+static av_always_inline av_const unsigned av_clip_uint8_arm(int a) | ||
{ | ||
unsigned x; | ||
__asm__ ("usat %0, #8, %1" : "=r"(x) : "r"(a)); | ||
@@ -52,15 +52,15 @@ static av_always_inline av_const uint8_t av_clip_uint8_arm(int a) | ||
} | ||
|
||
#define av_clip_int8 av_clip_int8_arm | ||
-static av_always_inline av_const uint8_t av_clip_int8_arm(int a) | ||
+static av_always_inline av_const int av_clip_int8_arm(int a) | ||
{ | ||
- unsigned x; | ||
+ int x; | ||
__asm__ ("ssat %0, #8, %1" : "=r"(x) : "r"(a)); | ||
return x; | ||
} | ||
|
||
#define av_clip_uint16 av_clip_uint16_arm | ||
-static av_always_inline av_const uint16_t av_clip_uint16_arm(int a) | ||
+static av_always_inline av_const unsigned av_clip_uint16_arm(int a) | ||
{ | ||
unsigned x; | ||
__asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a)); | ||
@@ -68,7 +68,7 @@ static av_always_inline av_const uint16_t av_clip_uint16_arm(int a) | ||
} | ||
|
||
#define av_clip_int16 av_clip_int16_arm | ||
-static av_always_inline av_const int16_t av_clip_int16_arm(int a) | ||
+static av_always_inline av_const int av_clip_int16_arm(int a) | ||
{ | ||
int x; | ||
__asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters