Skip to content

Commit

Permalink
fix avx align error
Browse files Browse the repository at this point in the history
  • Loading branch information
songkuangshi committed Mar 22, 2019
1 parent e29a674 commit d4595c8
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
20 changes: 10 additions & 10 deletions LightCTR/common/avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ inline void avx_vecAdd(const float* x, const float* y, float* res, size_t len) {
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_add_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
y += 8;
res += 8;
Expand All @@ -42,7 +42,7 @@ inline void avx_vecAdd(const float* x, const float const_delta, float* res, size
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_add_ps(_mm256_loadu_ps(x), delta);
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
res += 8;
}
Expand All @@ -62,7 +62,7 @@ inline void avx_vecScalerAdd(const float* x, const float* y, float* res,
for (; len > 7; len -= 8) {
__m256 t = _mm256_add_ps(_mm256_loadu_ps(x),
_mm256_mul_ps(_mm256_loadu_ps(y), _scalar));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
y += 8;
res += 8;
Expand All @@ -83,7 +83,7 @@ inline void avx_vecScalerAdd(const float* x, const float* y, float* res,
__m256 t = _mm256_add_ps(_mm256_loadu_ps(x),
_mm256_mul_ps(_mm256_loadu_ps(y),
_mm256_loadu_ps(y_scalar)));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
y += 8;
y_scalar += 8;
Expand Down Expand Up @@ -162,7 +162,7 @@ inline void avx_vecSqrt(const float* x, float *res, size_t len) {
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_sqrt_ps(_mm256_loadu_ps(x));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
res += 8;
}
Expand All @@ -178,7 +178,7 @@ inline void avx_vecRsqrt(const float* x, float *res, size_t len) {
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_rsqrt_ps(_mm256_loadu_ps(x));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
res += 8;
}
Expand All @@ -194,7 +194,7 @@ inline void avx_vecRcp(const float* x, float *res, size_t len) {
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_rcp_ps(_mm256_loadu_ps(x));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
res += 8;
}
Expand All @@ -211,7 +211,7 @@ inline void avx_vecScale(const float* x, float *res, size_t len, const float sca
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_mul_ps(_mm256_loadu_ps(x), _scalar);
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
res += 8;
}
Expand All @@ -227,7 +227,7 @@ inline void avx_vecScale(const float* x, float *res, size_t len, const float* sc
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_mul_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(scalar));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
scalar += 8;
res += 8;
Expand All @@ -245,7 +245,7 @@ inline void avx_vecDiv(const float* x, const float* y, float* res, size_t len) {
if (len > 7) {
for (; len > 7; len -= 8) {
__m256 t = _mm256_div_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y));
_mm256_store_ps(res, t);
_mm256_storeu_ps(res, t);
x += 8;
y += 8;
res += 8;
Expand Down
2 changes: 1 addition & 1 deletion LightCTR/common/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ inline time_t __must_inline__ get_now_s(void) {

inline time_t __must_inline__ gettickspan(uint64_t old_tick = get_now_ms()) {
update_tv();
int64_t cur_tick = get_now_ms();
uint64_t cur_tick = get_now_ms();
if (old_tick > cur_tick) {
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion build_ring.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ wait
echo
echo
echo "[Build Success]"
echo "Please copy different BIN file to corresponding machine, DON'T forget expert LightCTR_PS_NUM, LightCTR_WORKER_NUM and LightCTR_MASTER_ADDR, run Master first"
echo "Please copy different BIN file to corresponding machine, DON'T forget expert LightCTR_WORKER_NUM and LightCTR_MASTER_ADDR, run Master first"
echo
echo "[or] Press any key to run clunster on standalone mode"
read -n 1
Expand Down

0 comments on commit d4595c8

Please sign in to comment.