Skip to content

Commit

Permalink
Simplify Neon MSE helper function params/return values
Browse files Browse the repository at this point in the history
Simplify the parameters and return values of the Neon MSE helper
functions for both standard and high bitdepth - avoiding unused
return values.

Change-Id: I6f9208f9ce890fbe58346d9c7d9d701f28f2f90f
  • Loading branch information
jwright-arm committed Aug 31, 2023
1 parent 6da1bd0 commit 7ee16bc
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 70 deletions.
86 changes: 38 additions & 48 deletions vpx_dsp/arm/highbd_variance_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,7 @@ HIGHBD_GET_VAR(16)
static INLINE uint32_t highbd_mse_wxh_neon(const uint16_t *src_ptr,
int src_stride,
const uint16_t *ref_ptr,
int ref_stride, int w, int h,
unsigned int *sse) {
int ref_stride, int w, int h) {
uint32x4_t sse_u32[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };

int i = h;
Expand All @@ -382,17 +381,15 @@ static INLINE uint32_t highbd_mse_wxh_neon(const uint16_t *src_ptr,
ref_ptr += ref_stride;
} while (--i != 0);

*sse = horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
return *sse;
return horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
}

#if defined(__ARM_FEATURE_DOTPROD)

static INLINE uint32_t highbd_mse8_8xh_neon(const uint16_t *src_ptr,
int src_stride,
const uint16_t *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
int ref_stride, int h) {
uint32x4_t sse_u32 = vdupq_n_u32(0);

int i = h / 2;
Expand All @@ -416,15 +413,13 @@ static INLINE uint32_t highbd_mse8_8xh_neon(const uint16_t *src_ptr,
sse_u32 = vdotq_u32(sse_u32, diff, diff);
} while (--i != 0);

*sse = horizontal_add_uint32x4(sse_u32);
return *sse;
return horizontal_add_uint32x4(sse_u32);
}

static INLINE uint32_t highbd_mse8_16xh_neon(const uint16_t *src_ptr,
int src_stride,
const uint16_t *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
int ref_stride, int h) {
uint32x4_t sse_u32 = vdupq_n_u32(0);

int i = h;
Expand All @@ -447,60 +442,55 @@ static INLINE uint32_t highbd_mse8_16xh_neon(const uint16_t *src_ptr,
ref_ptr += ref_stride;
} while (--i != 0);

*sse = horizontal_add_uint32x4(sse_u32);
return *sse;
return horizontal_add_uint32x4(sse_u32);
}

#else // !defined(__ARM_FEATURE_DOTPROD)

static INLINE uint32_t highbd_mse8_8xh_neon(const uint16_t *src_ptr,
int src_stride,
const uint16_t *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
return highbd_mse_wxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 8, h,
sse);
int ref_stride, int h) {
return highbd_mse_wxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 8, h);
}

static INLINE uint32_t highbd_mse8_16xh_neon(const uint16_t *src_ptr,
int src_stride,
const uint16_t *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
return highbd_mse_wxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 16, h,
sse);
int ref_stride, int h) {
return highbd_mse_wxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 16, h);
}

#endif // defined(__ARM_FEATURE_DOTPROD)

#define HIGHBD_MSE_WXH_NEON(w, h) \
uint32_t vpx_highbd_8_mse##w##x##h##_neon( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, uint32_t *sse) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src_ptr); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref_ptr); \
highbd_mse8_##w##xh_neon(src, src_stride, ref, ref_stride, h, sse); \
return *sse; \
} \
\
uint32_t vpx_highbd_10_mse##w##x##h##_neon( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, uint32_t *sse) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src_ptr); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref_ptr); \
highbd_mse_wxh_neon(src, src_stride, ref, ref_stride, w, h, sse); \
*sse = ROUND_POWER_OF_TWO(*sse, 4); \
return *sse; \
} \
\
uint32_t vpx_highbd_12_mse##w##x##h##_neon( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, uint32_t *sse) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src_ptr); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref_ptr); \
highbd_mse_wxh_neon(src, src_stride, ref, ref_stride, w, h, sse); \
*sse = ROUND_POWER_OF_TWO(*sse, 8); \
return *sse; \
#define HIGHBD_MSE_WXH_NEON(w, h) \
uint32_t vpx_highbd_8_mse##w##x##h##_neon( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, uint32_t *sse) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src_ptr); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref_ptr); \
*sse = highbd_mse8_##w##xh_neon(src, src_stride, ref, ref_stride, h); \
return *sse; \
} \
\
uint32_t vpx_highbd_10_mse##w##x##h##_neon( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, uint32_t *sse) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src_ptr); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref_ptr); \
*sse = highbd_mse_wxh_neon(src, src_stride, ref, ref_stride, w, h); \
*sse = ROUND_POWER_OF_TWO(*sse, 4); \
return *sse; \
} \
\
uint32_t vpx_highbd_12_mse##w##x##h##_neon( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, uint32_t *sse) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src_ptr); \
uint16_t *ref = CONVERT_TO_SHORTPTR(ref_ptr); \
*sse = highbd_mse_wxh_neon(src, src_stride, ref, ref_stride, w, h); \
*sse = ROUND_POWER_OF_TWO(*sse, 8); \
return *sse; \
}

HIGHBD_MSE_WXH_NEON(16, 16)
Expand Down
36 changes: 14 additions & 22 deletions vpx_dsp/arm/variance_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,7 @@ VARIANCE_WXH_NEON(64, 64, 12)
static INLINE unsigned int vpx_mse8xh_neon(const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
int ref_stride, int h) {
uint32x2_t sse_u32[2] = { vdup_n_u32(0), vdup_n_u32(0) };

int i = h / 2;
Expand All @@ -398,15 +397,13 @@ static INLINE unsigned int vpx_mse8xh_neon(const unsigned char *src_ptr,
sse_u32[1] = vdot_u32(sse_u32[1], diff1, diff1);
} while (--i != 0);

*sse = horizontal_add_uint32x2(vadd_u32(sse_u32[0], sse_u32[1]));
return *sse;
return horizontal_add_uint32x2(vadd_u32(sse_u32[0], sse_u32[1]));
}

static INLINE unsigned int vpx_mse16xh_neon(const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
int ref_stride, int h) {
uint32x4_t sse_u32[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };

int i = h / 2;
Expand All @@ -429,8 +426,7 @@ static INLINE unsigned int vpx_mse16xh_neon(const unsigned char *src_ptr,
sse_u32[1] = vdotq_u32(sse_u32[1], diff1, diff1);
} while (--i != 0);

*sse = horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
return *sse;
return horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
}

unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
Expand All @@ -451,8 +447,7 @@ unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
static INLINE unsigned int vpx_mse8xh_neon(const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
int ref_stride, int h) {
uint32x4_t sse_u32[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };

int i = h / 2;
Expand All @@ -478,15 +473,13 @@ static INLINE unsigned int vpx_mse8xh_neon(const unsigned char *src_ptr,
sse_u32[1] = vpadalq_u16(sse_u32[1], sse1);
} while (--i != 0);

*sse = horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
return *sse;
return horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
}

static INLINE unsigned int vpx_mse16xh_neon(const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride, int h,
unsigned int *sse) {
int ref_stride, int h) {
uint32x4_t sse_u32[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };

int i = h;
Expand All @@ -507,8 +500,7 @@ static INLINE unsigned int vpx_mse16xh_neon(const unsigned char *src_ptr,
sse_u32[1] = vpadalq_u16(sse_u32[1], sse1);
} while (--i != 0);

*sse = horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
return *sse;
return horizontal_add_uint32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
}

unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
Expand Down Expand Up @@ -538,12 +530,12 @@ unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,

#endif // defined(__ARM_FEATURE_DOTPROD)

#define VPX_MSE_WXH_NEON(w, h) \
unsigned int vpx_mse##w##x##h##_neon( \
const unsigned char *src_ptr, int src_stride, \
const unsigned char *ref_ptr, int ref_stride, unsigned int *sse) { \
return vpx_mse##w##xh_neon(src_ptr, src_stride, ref_ptr, ref_stride, h, \
sse); \
#define VPX_MSE_WXH_NEON(w, h) \
unsigned int vpx_mse##w##x##h##_neon( \
const unsigned char *src_ptr, int src_stride, \
const unsigned char *ref_ptr, int ref_stride, unsigned int *sse) { \
*sse = vpx_mse##w##xh_neon(src_ptr, src_stride, ref_ptr, ref_stride, h); \
return *sse; \
}

VPX_MSE_WXH_NEON(8, 8)
Expand Down

0 comments on commit 7ee16bc

Please sign in to comment.