Skip to content

Commit

Permalink
Add part of MMX and modify the class
Browse files Browse the repository at this point in the history
Add:
MMX::rgb2yuv
Class rgb:: r16, g16, b16
Class yuv:: y16, u16, v16
  • Loading branch information
mimicji committed Dec 23, 2016
1 parent 675ec68 commit 92c051d
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 32 deletions.
28 changes: 10 additions & 18 deletions alpha_blend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,35 +60,27 @@ inline void image_overlay_MMX(__m64* dst, __m64* src1, __m64* src2, const uint16
_mm_empty();
}

uint16_t dst16[WIDTH * HEIGHT];

inline void blend_one_color_MMX(uint8_t* dst8, uint16_t* src16, const uint8_t alpha) {
int i;
inline void blend_one_color_MMX(int16_t* dst16, uint16_t* src16, const uint8_t alpha) {
alpha_blend_MMX((__m64*)dst16, (__m64*)src16, alpha, WIDTH * HEIGHT / 4);
for (i = 0; i < WIDTH * HEIGHT; i++) {
dst8[i] = (uint8_t)dst16[i];
}
}

inline void overlay_one_color_MMX(uint8_t* dst8, uint16_t* src16_1, uint16_t* src16_2, const uint8_t alpha) {
int i;
inline void overlay_one_color_MMX(int16_t* dst16, uint16_t* src16_1, uint16_t* src16_2, const uint8_t alpha) {
image_overlay_MMX((__m64*)dst16, (__m64*)src16_1, (__m64*)src16_2, alpha, WIDTH * HEIGHT / 4);
for (i = 0; i < WIDTH * HEIGHT; i++) {
dst8[i] = (uint8_t)dst16[i];
}
}
}

void MMX::alpha_blend(const RGB* dst_rgb, const RGB* src_rgb, const uint8_t alpha) {
blend_one_color_MMX(dst_rgb->r_ptr, src_rgb->r16, alpha);
blend_one_color_MMX(dst_rgb->g_ptr, src_rgb->g16, alpha);
blend_one_color_MMX(dst_rgb->b_ptr, src_rgb->b16, alpha);
// In this function, it only changes the RGB16. Thus, it can not match functions in Non_Simd
blend_one_color_MMX(dst_rgb->r16, (uint16_t*)src_rgb->r16, alpha);
blend_one_color_MMX(dst_rgb->g16, (uint16_t*)src_rgb->g16, alpha);
blend_one_color_MMX(dst_rgb->b16, (uint16_t*)src_rgb->b16, alpha);
}

void MMX::image_overlay(const RGB* dst_rgb, const RGB* src_rgb_1, const RGB* src_rgb_2, const uint8_t alpha) {
overlay_one_color_MMX(dst_rgb->r_ptr, src_rgb_1->r16, src_rgb_2->r16, alpha);
overlay_one_color_MMX(dst_rgb->g_ptr, src_rgb_1->g16, src_rgb_2->g16, alpha);
overlay_one_color_MMX(dst_rgb->b_ptr, src_rgb_1->b16, src_rgb_2->b16, alpha);
// In this function, it only changes the RGB16. Thus, it can not match functions in Non_Simd
overlay_one_color_MMX(dst_rgb->r16, (uint16_t*)src_rgb_1->r16, (uint16_t*)src_rgb_2->r16, alpha);
overlay_one_color_MMX(dst_rgb->g16, (uint16_t*)src_rgb_1->g16, (uint16_t*)src_rgb_2->g16, alpha);
overlay_one_color_MMX(dst_rgb->b16, (uint16_t*)src_rgb_1->b16, (uint16_t*)src_rgb_2->b16, alpha);
}

void AVX::alpha_blend(const RGB* dst_rgb, const RGB* src_rgb, const uint8_t alpha) {
Expand Down
2 changes: 1 addition & 1 deletion main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ int main() {
clock_t recuit_time = clock();
//MMX::alpha_blend(dst_rgb, tmp_rgb, alpha);
MMX::image_overlay(dst_rgb, tmp_rgb, tmp_rgb2, alpha);
rgb2yuv(dst_yuv, dst_rgb);
MMX::rgb2yuv(dst_yuv, dst_rgb);
total_time += clock() - recuit_time;
dst_yuv->write(fout);
dst_rgb->clean();
Expand Down
6 changes: 3 additions & 3 deletions rgb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@

void RGB::extend() const {
for (int i = 0; i<width * height; i++) {
this->r16[i] = (uint16_t)this->r_ptr[i];
this->g16[i] = (uint16_t)this->g_ptr[i];
this->b16[i] = (uint16_t)this->b_ptr[i];
this->r16[i] = (int16_t)this->r_ptr[i];
this->g16[i] = (int16_t)this->g_ptr[i];
this->b16[i] = (int16_t)this->b_ptr[i];
}
}

Expand Down
12 changes: 6 additions & 6 deletions rgb.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,19 @@ class RGB {
uint8_t* r_ptr;
uint8_t* g_ptr;
uint8_t* b_ptr;
uint16_t* r16;
uint16_t* g16;
uint16_t* b16;
int16_t* r16;
int16_t* g16;
int16_t* b16;

RGB(int32_t _width, int32_t _height) {
width = _width;
height = _height;
r_ptr = new uint8_t[width * height];
g_ptr = new uint8_t[width * height];
b_ptr = new uint8_t[width * height];
r16 = new uint16_t[width * height];
g16 = new uint16_t[width * height];
b16 = new uint16_t[width * height];
r16 = new int16_t[width * height];
g16 = new int16_t[width * height];
b16 = new int16_t[width * height];
clean();
}

Expand Down
124 changes: 123 additions & 1 deletion rgb2yuv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,130 @@ void Non_Simd::rgb2yuv(const YUV* dst_yuv, const RGB* src_rgb) {
}
}

int16_t tmp_r[WIDTH * HEIGHT / 4];
int16_t tmp_g[WIDTH * HEIGHT / 4];
int16_t tmp_b[WIDTH * HEIGHT / 4];
static const int16_t RGB_Y[3] = { 0.256788 * (1 << 16), 0.004129 * (1 << 16), 0.097906 * (1 << 16) }; // offset: 0 -0.5 0
static const int16_t RGB_U[3] = { 0.439216 * (1 << 16), -0.367788 * (1 << 16), -0.071427 * (1 << 16) }; // offset: 0 0 0
static const int16_t RGB_V[3] = { -0.148223 * (1 << 16), -0.290993 * (1 << 16), 0.439216 * (1 << 16) }; // offset: 0 0 0
void MMX::rgb2yuv(const YUV* dst_yuv, const RGB* src_rgb) {
// TODO

int i, j, k;
__m64 tmp;


_mm_empty();

// RGB to Y Channel
__m64* dst = (__m64*) dst_yuv->y16;
__m64* src_r = (__m64*)src_rgb->r16;
__m64* src_g = (__m64*)src_rgb->g16;
__m64* src_b = (__m64*)src_rgb->b16;
const __m64 R_Y = _mm_set_pi16(RGB_Y[0], RGB_Y[0], RGB_Y[0], RGB_Y[0]);
const __m64 G_Y = _mm_set_pi16(RGB_Y[1], RGB_Y[1], RGB_Y[1], RGB_Y[1]);
const __m64 B_Y = _mm_set_pi16(RGB_Y[2], RGB_Y[2], RGB_Y[2], RGB_Y[2]);
const __m64 OFFSET_16 = _mm_set_pi16(16, 16, 16, 16);
for (i = 0; i < dst_yuv->height * dst_yuv->width / 4; i++) {
// R Channel to Y Channel
*dst = _m_pmulhw(*src_r, R_Y); // Y = R * 0.256788

// G Channel to Y Channel
tmp = _m_pmulhw(*src_g, G_Y);
*dst = _m_paddsw(tmp, *dst); // Y += G * 0.004129
tmp = _m_psrlwi(*src_g, 1);
*dst = _m_paddsw(tmp, *dst); // Y += G >> 1;

// B Channel to Y Channel
tmp = _m_pmulhw(*src_b, B_Y);
*dst = _m_paddsw(tmp, *dst); // Y += B * 0.097906

// Add offset
*dst = _m_paddsw(*dst, OFFSET_16); // Y += 16

// increase iterators
dst++;
src_r++;
src_g++;
src_b++;
}
// End of RGB to Y Channel



for (i = 0, k = 0; i < dst_yuv->height; i += 2) {
for (j = 0; j < dst_yuv->width; j += 2, ++k) {
tmp_r[k] = src_rgb->r16[i * dst_yuv->width + j];
tmp_g[k] = src_rgb->g16[i * dst_yuv->width + j];
tmp_b[k] = src_rgb->b16[i * dst_yuv->width + j];
}
}

// RGB to U Channel
dst = (__m64*) dst_yuv->u16;
src_r = (__m64*) tmp_r;
src_g = (__m64*) tmp_g;
src_b = (__m64*) tmp_b;
const __m64 R_U = _mm_set_pi16(RGB_U[0], RGB_U[0], RGB_U[0], RGB_U[0]);
const __m64 G_U = _mm_set_pi16(RGB_U[1], RGB_U[1], RGB_U[1], RGB_U[1]);
const __m64 B_U = _mm_set_pi16(RGB_U[2], RGB_U[2], RGB_U[2], RGB_U[2]);
const __m64 OFFSET_128 = _mm_set_pi16(128, 128, 128, 128);
for (i = 0; i < WIDTH * HEIGHT / 16; i++) {
// R Channel to U Channel
*dst = _m_pmulhw(*src_r, R_U); // U = R * 0.439216

// G Channel to U Channel
tmp = _m_pmulhw(*src_g, G_U);
*dst = _m_paddsw(tmp, *dst); // U += G * (-0.367788)

// B Channel to U Channel
tmp = _m_pmulhw(*src_b, B_U);
*dst = _m_paddsw(tmp, *dst); // U += B * (-0.071427)

// Add offset
*dst = _m_paddsw(*dst, OFFSET_128); // U += 128

// increase iterators
dst++;
src_r++;
src_g++;
src_b++;
}
// End of RGB to U Channel

// RGB to V Channel
dst = (__m64*) dst_yuv->v16;
src_r = (__m64*) tmp_r;
src_g = (__m64*) tmp_g;
src_b = (__m64*) tmp_b;
const __m64 R_V = _mm_set_pi16(RGB_V[0], RGB_V[0], RGB_V[0], RGB_V[0]);
const __m64 G_V = _mm_set_pi16(RGB_V[1], RGB_V[1], RGB_V[1], RGB_V[1]);
const __m64 B_V = _mm_set_pi16(RGB_V[2], RGB_V[2], RGB_V[2], RGB_V[2]);
for (i = 0; i < WIDTH * HEIGHT / 16; i++) {
// R Channel to V Channel
*dst = _m_pmulhw(*src_r, R_V); // V = R * (-0.148223)

// G Channel to V Channel
tmp = _m_pmulhw(*src_g, G_V);
*dst = _m_paddsw(tmp, *dst); // V += G * (-0.290993)

// B Channel to V Channel
tmp = _m_pmulhw(*src_b, B_V);
*dst = _m_paddsw(tmp, *dst); // V += B * (0.439216)

// Add offset
*dst = _m_paddsw(*dst, OFFSET_128); // V += 128

// increase iterators
dst++;
src_r++;
src_g++;
src_b++;
}
// End of RGB to V Channel

_mm_empty();
dst_yuv->s16_to_u8();
// End of function
}

void AVX::rgb2yuv(const YUV* dst_yuv, const RGB* src_rgb) {
Expand Down
30 changes: 30 additions & 0 deletions yuv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,36 @@

#include "yuv.h"

void YUV::u8_to_s16() const {
for (int i = 0; i < width * height; i++) {
this->y16[i] = (int16_t)this->y_ptr[i];
this->u16[i] = (int16_t)this->u_ptr[i];
this->v16[i] = (int16_t)this->v_ptr[i];
}
}

namespace {
_forceinline inline uint8_t format(int16_t input) {
if (input > 255) {
return (uint8_t)255;
}
if (input < 0) {
return (uint8_t)0;
}
return (uint8_t)input;
}
}

void YUV::s16_to_u8() const {
for (int i = 0; i < width * height; i++) {
this->y_ptr[i] = format(this->y16[i]);
}
for (int i = 0; i < width * height / 4; i++) {
this->u_ptr[i] = format(this->u16[i]);
this->v_ptr[i] = format(this->v16[i]);
}
}

int YUV::read_file(const char* file_name) const {
FILE * fp;
if ((fp = fopen(file_name, "rb")) == NULL) return -1;
Expand Down
15 changes: 12 additions & 3 deletions yuv.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,37 @@ class YUV {
uint8_t* y_ptr;
uint8_t* u_ptr;
uint8_t* v_ptr;
int16_t* y16;
int16_t* u16;
int16_t* v16;

YUV(int32_t _width, int32_t _height) {
width = _width;
height = _height;
y_ptr = new uint8_t[width * height];
u_ptr = new uint8_t[width * height / 4];
v_ptr = new uint8_t[width * height / 4];
y16 = new int16_t[width * height];
u16 = new int16_t[width * height / 4];
v16 = new int16_t[width * height / 4];
clean();
}

~YUV() {
delete[] y_ptr;
delete[] u_ptr;
delete[] v_ptr;
fclose(stdout);
delete[] y16;
delete[] u16;
delete[] v16;
}

void u8_to_s16() const;
void s16_to_u8() const;
int read_file(const char* file_name) const;
void write(FILE* fp) const;
void clean() const;
int32_t getWidth() const;
int32_t getHeight() const;

};

};

0 comments on commit 92c051d

Please sign in to comment.