#include <stdio.h>
__int16 csMMX_16_b;
__int64 csMMX_128_w[2];
__int16 csMMX_0x00FF_w;
__int16 csMMX_U_green_w;
__int16 csMMX_V_green_w;
__int16 csMMX_U_blue_w;
__int16 csMMX_V_red_w;
__int16 csMMX_Y_coeff_w;
#define YUV422ToRGB32_MMX(out_RGB_reg,WriteCode) \
/*input : mm0 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
/* mm1 = 00 u3 00 u2 00 u1 00 u0 */ \
/* mm2 = 00 v3 00 v2 00 v1 00 v0 */ \
/*output : [out_RGB_reg -- out_RGB_reg+8*4] */ \
\
__asm psubusb mm0, csMMX_16_b /* mm0 : Y -= 16 */ \
__asm psubsw mm1, csMMX_128_w /* mm1 : u -= 128 */ \
__asm movq mm7, mm0 \
__asm psubsw mm2, csMMX_128_w /* mm2 : v -= 128 */ \
__asm pand mm0, csMMX_0x00FF_w /* mm0 = 00 Y6 00 Y4 00 Y2 00 Y0 */ \
__asm psllw mm1, 3 /* mm1 : u *= 8 */ \
__asm psllw mm2, 3 /* mm2 : v *= 8 */ \
__asm psrlw mm7, 8 /* mm7 = 00 Y7 00 Y5 00 Y3 00 Y1 */ \
__asm movq mm3, mm1 \
__asm movq mm4, mm2 \
\
__asm pmulhw mm1, csMMX_U_green_w /* mm1 = u * U_green */ \
__asm psllw mm0, 3 /* y*=8 */ \
__asm pmulhw mm2, csMMX_V_green_w /* mm2 = v * V_green */ \
__asm psllw mm7, 3 /* y*=8 */ \
__asm pmulhw mm3, csMMX_U_blue_w \
__asm paddsw mm1, mm2 \
__asm pmulhw mm4, csMMX_V_red_w \
__asm movq mm2, mm3 \
__asm pmulhw mm0, csMMX_Y_coeff_w \
__asm movq mm6, mm4 \
__asm pmulhw mm7, csMMX_Y_coeff_w \
__asm movq mm5, mm1 \
__asm paddsw mm3, mm0 /* mm3 = B6 B4 B2 B0 */ \
__asm paddsw mm2, mm7 /* mm2 = B7 B5 B3 B1 */ \
__asm paddsw mm4, mm0 /* mm4 = R6 R4 R2 R0 */ \
__asm paddsw mm6, mm7 /* mm6 = R7 R5 R3 R1 */ \
__asm paddsw mm1, mm0 /* mm1 = G6 G4 G2 G0 */ \
__asm paddsw mm5, mm7 /* mm5 = G7 G5 G3 G1 */ \
\
__asm packuswb mm3, mm4 /* mm3 = R6 R4 R2 R0 B6 B4 B2 B0 to [0-255] */ \
__asm packuswb mm2, mm6 /* mm2 = R7 R5 R3 R1 B7 B5 B3 B1 to [0-255] */ \
__asm packuswb mm5, mm1 /* mm5 = G6 G4 G2 G0 G7 G5 G3 G1 to [0-255] */ \
__asm movq mm4, mm3 \
__asm punpcklbw mm3, mm2 /* mm3 = B7 B6 B5 B4 B3 B2 B1 B0 */ \
__asm punpckldq mm1, mm5 /* mm1 = G7 G5 G3 G1 xx xx xx xx */ \
__asm punpckhbw mm4, mm2 /* mm4 = R7 R6 R5 R4 R3 R2 R1 R0 */ \
__asm punpckhbw mm5, mm1 /* mm5 = G7 G6 G5 G4 G3 G2 G1 G0 */ \
\
/*out*/ \
__asm pcmpeqb mm2, mm2 /* mm2 = FF FF FF FF FF FF FF FF */ \
\
__asm movq mm0, mm3 \
__asm movq mm7, mm4 \
__asm punpcklbw mm0, mm5 /* mm0 = G3 B3 G2 B2 G1 B1 G0 B0 */ \
__asm punpcklbw mm7, mm2 /* mm7 = FF R3 FF R2 FF R1 FF R0 */ \
__asm movq mm1, mm0 \
__asm movq mm6, mm3 \
__asm punpcklwd mm0, mm7 /* mm0 = FF R1 G1 B1 FF R0 G0 B0 */ \
__asm punpckhwd mm1, mm7 /* mm1 = FF R3 G3 B3 FF R2 G2 B2 */ \
__asm WriteCode[out_RGB_reg], mm0 \
__asm movq mm7, mm4 \
__asm punpckhbw mm6, mm5 /* mm6 = G7 B7 G6 B6 G5 B5 G4 B4 */ \
__asm WriteCode[out_RGB_reg + 8], mm1 \
__asm punpckhbw mm7, mm2 /* mm7 = FF R7 FF R6 FF R5 FF R4 */ \
__asm movq mm0, mm6 \
__asm punpcklwd mm6, mm7 /* mm6 = FF R5 G5 B5 FF R4 G4 B4 */ \
__asm punpckhwd mm0, mm7 /* mm0 = FF R7 G7 B7 FF R6 G6 B6 */ \
__asm WriteCode[out_RGB_reg + 8 * 2], mm6 \
__asm WriteCode[out_RGB_reg + 8 * 3], mm0
int main(void)
{
int x;
YUV422ToRGB32_MMX(x, movq);
return 0;
}