1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
Index: libtheora-1.0beta3/lib/enc/x86_32/dct_decode_mmx.c
===================================================================
--- libtheora-1.0beta3.orig/lib/enc/x86_32/dct_decode_mmx.c
+++ libtheora-1.0beta3/lib/enc/x86_32/dct_decode_mmx.c
@@ -24,12 +24,6 @@
static const __attribute__((aligned(8),used)) ogg_int64_t V3= 0x0003000300030003LL;
static const __attribute__((aligned(8),used)) ogg_int64_t V804= 0x0804080408040804LL;
-#if defined(__APPLE__) || defined(__CYGWIN__) || defined (__WIN32__)
-#define MANGLE(x) "_"#x
-#else
-#define MANGLE(x) #x
-#endif
-
static void FilterHoriz__mmx(unsigned char * PixelPtr,
ogg_int32_t LineLength,
ogg_int16_t *BoundingValuePtr){
@@ -57,9 +51,9 @@ static void FilterHoriz__mmx(unsigned ch
"psubw %%mm3,%%mm1\n" /* mm1 = pix[0]-pix[3] mm1 - mm3 */ \
"movq %%mm0,%%mm7\n" /* mm7 = pix[2]*/ \
"psubw %%mm5,%%mm0\n" /* mm0 = pix[2]-pix[1] mm0 - mm5*/ \
- "PMULLW "MANGLE(V3)",%%mm0\n" /* *3 */ \
+ "PMULLW %3,%%mm0\n" /* *3 */ \
"paddw %%mm0,%%mm1\n" /* mm1 has f[0] ... f[4]*/ \
- "paddw "MANGLE(V804)",%%mm1\n"/* add 4 */ /* add 256 after shift */ \
+ "paddw %4,%%mm1\n"/* add 4 */ /* add 256 after shift */ \
"psraw $3,%%mm1\n" /* >>3 */ \
" pextrw $0,%%mm1,%%esi\n" /* In MM1 we have 4 f coefs (16bits) */ \
" pextrw $1,%%mm1,%%edi\n" /* now perform MM4 = *(_bv+ f) */ \
@@ -86,7 +80,7 @@ static void FilterHoriz__mmx(unsigned ch
" shrl $16,%%edi\n" \
" movw %%di,1(%0,%%esi)\n" \
: \
- : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256) \
+ : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804) \
: "esi", "edi" , "memory" \
);
@@ -125,12 +119,12 @@ static void FilterVert__mmx(unsigned cha
"psubw %%mm5,%%mm3\n"
"psubw %%mm4,%%mm2\n"
/* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */
- "PMULLW "MANGLE(V3)",%%mm3\n" /* *3 */
- "PMULLW "MANGLE(V3)",%%mm2\n" /* *3 */
+ "PMULLW %3,%%mm3\n" /* *3 */
+ "PMULLW %3,%%mm2\n" /* *3 */
"paddw %%mm7,%%mm3\n" /* highpart */
"paddw %%mm6,%%mm2\n" /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]); */
- "paddw "MANGLE(V804)",%%mm3\n" /* add 4 */ /* add 256 after shift */
- "paddw "MANGLE(V804)",%%mm2\n" /* add 4 */ /* add 256 after shift */
+ "paddw %4,%%mm3\n" /* add 4 */ /* add 256 after shift */
+ "paddw %4,%%mm2\n" /* add 4 */ /* add 256 after shift */
"psraw $3,%%mm3\n" /* >>3 f coefs high */
"psraw $3,%%mm2\n" /* >>3 f coefs low */
@@ -167,7 +161,7 @@ static void FilterVert__mmx(unsigned cha
"movq %%mm4,(%0,%1)\n" /* pix[ystride]= */
"emms\n"
:
- : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256)
+ : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256), "m" (V3), "m" (V804)
: "esi", "edi" , "memory"
);
}
|