Skip to content

Commit

Permalink
Fix unaligned accesses in bitstream writer
Browse files Browse the repository at this point in the history
Fixes x264 on CPUs with no unaligned access support (e.g. SPARC).
Improves performance marginally on CPUs with penalties for unaligned stores (e.g. some x86).
  • Loading branch information
yuvi authored and Fiona Glaser committed Aug 21, 2009
1 parent 77c46eb commit 1a072a3
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 17 deletions.
26 changes: 9 additions & 17 deletions common/bs.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,22 @@ extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];

static inline void bs_init( bs_t *s, void *p_data, int i_data )
{
int offset = ((intptr_t)p_data & (WORD_SIZE-1));
int offset = ((intptr_t)p_data & 3);
s->p = s->p_start = (uint8_t*)p_data - offset;
s->p_end = (uint8_t*)p_data + i_data;
s->i_left = offset ? 8*offset : (WORD_SIZE*8);
s->cur_bits = endian_fix( *(intptr_t*)s->p );
s->i_left = (WORD_SIZE - offset)*8;
s->cur_bits = endian_fix32(*(uint32_t *)(s->p));
s->cur_bits >>= (4-offset)*8;
}
static inline int bs_pos( bs_t *s )
{
return( 8 * (s->p - s->p_start) + (WORD_SIZE*8) - s->i_left );
}

/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32/64-bit aligned. */
/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
static inline void bs_flush( bs_t *s )
{
*(intptr_t*)s->p = endian_fix( s->cur_bits << s->i_left );
*(uint32_t*)s->p = endian_fix32( s->cur_bits << (s->i_left&31) );
s->p += WORD_SIZE - s->i_left / 8;
s->i_left = WORD_SIZE*8;
}
Expand Down Expand Up @@ -151,21 +152,12 @@ static inline void bs_write1( bs_t *s, uint32_t i_bit )

static inline void bs_align_0( bs_t *s )
{
if( s->i_left&7 )
{
s->cur_bits <<= s->i_left&7;
s->i_left &= ~7;
}
bs_write( s, s->i_left&7, 0 );
bs_flush( s );
}
static inline void bs_align_1( bs_t *s )
{
if( s->i_left&7 )
{
s->cur_bits <<= s->i_left&7;
s->cur_bits |= (1 << (s->i_left&7)) - 1;
s->i_left &= ~7;
}
bs_write( s, s->i_left&7, (1 << (s->i_left&7)) - 1 );
bs_flush( s );
}

Expand Down Expand Up @@ -245,7 +237,7 @@ static inline void bs_write_te( bs_t *s, int x, int val )
static inline void bs_rbsp_trailing( bs_t *s )
{
bs_write1( s, 1 );
bs_flush( s );
bs_write( s, s->i_left&7, 0 );
}

static inline int bs_size_ue( unsigned int val )
Expand Down
4 changes: 4 additions & 0 deletions encoder/cavlc.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
#if !RDO_SKIP_BS
if( i_mb_type == I_PCM )
{
uint8_t *p_start = s->p_start;
bs_write_ue( s, i_mb_i_offset + 25 );
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
Expand All @@ -313,6 +314,9 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
s->p += 64;

bs_init( s, s->p, s->p_end - s->p );
s->p_start = p_start;

/* if PCM is chosen, we need to store reconstructed frame data */
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
Expand Down
2 changes: 2 additions & 0 deletions encoder/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,7 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
x264_pps_write( &h->out.bs, h->pps );
x264_nal_end( h );
bs_flush( &h->out.bs );
}
/* now set output*/
*pi_nal = h->out.i_nal;
Expand Down Expand Up @@ -1374,6 +1375,7 @@ static int x264_slice_write( x264_t *h )
bs_write_ue( &h->out.bs, i_skip ); /* last skip run */
/* rbsp_slice_trailing_bits */
bs_rbsp_trailing( &h->out.bs );
bs_flush( &h->out.bs );
}

x264_nal_end( h );
Expand Down

0 comments on commit 1a072a3

Please sign in to comment.