mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-09 19:35:51 +00:00
Bug 498770 - Enable optimized Theora code in Windows builds - r=kinetik rs=roc
This commit is contained in:
parent
47d0907360
commit
fd39c3ad32
@ -3,6 +3,7 @@ source distribution using the update.sh script. The changes made were
|
|||||||
those applied by update.sh, the addition/update of Makefile.in files
|
those applied by update.sh, the addition/update of Makefile.in files
|
||||||
for the Mozilla build system and the patch in bug below.
|
for the Mozilla build system and the patch in bug below.
|
||||||
|
|
||||||
|
bug498770.patch - Enable optimized theora code in windows build
|
||||||
Bug 455357 - WinCE LibTheora Pre-defined Macro usage in local variable
|
Bug 455357 - WinCE LibTheora Pre-defined Macro usage in local variable
|
||||||
455357_wince_local_variable_macro_clash_patch
|
455357_wince_local_variable_macro_clash_patch
|
||||||
This patch is needed for building WinCE / WinMobile because the
|
This patch is needed for building WinCE / WinMobile because the
|
||||||
|
97
media/libtheora/bug498770.patch
Normal file
97
media/libtheora/bug498770.patch
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
Index: lib/dec/x86_vc/mmxfrag.c
|
||||||
|
===================================================================
|
||||||
|
--- lib/dec/x86_vc/mmxfrag.c (revision 16142)
|
||||||
|
+++ lib/dec/x86_vc/mmxfrag.c (working copy)
|
||||||
|
@@ -27,12 +27,14 @@
|
||||||
|
|
||||||
|
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
|
||||||
|
const ogg_int16_t *_residue){
|
||||||
|
+ int _save_ebx;
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
This function does the inter reconstruction step with 8 iterations
|
||||||
|
unrolled. The iteration for each instruction is noted by the #id in the
|
||||||
|
comments (in case you want to reconstruct it)
|
||||||
|
--------------------------------------------------------------------- */
|
||||||
|
_asm{
|
||||||
|
+ mov [_save_ebx], ebx
|
||||||
|
mov edi, [_residue] /* load residue ptr */
|
||||||
|
mov eax, 0x00800080 /* generate constant */
|
||||||
|
mov ebx, [_dst_ystride] /* load dst-stride */
|
||||||
|
@@ -93,6 +95,7 @@
|
||||||
|
packuswb mm3, mm4 /* #8 pack to byte */
|
||||||
|
movq [edx + ecx*2], mm1 /* #7 write row */
|
||||||
|
movq [edx + eax], mm3 /* #8 write row */
|
||||||
|
+ mov ebx, [_save_ebx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -100,6 +103,7 @@
|
||||||
|
|
||||||
|
void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
|
||||||
|
const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
|
||||||
|
+ int _save_ebx;
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
This function does the inter reconstruction step with two iterations
|
||||||
|
running in parallel to hide some load-latencies and break the dependency
|
||||||
|
@@ -107,6 +111,7 @@
|
||||||
|
comments (in case you want to reconstruct it)
|
||||||
|
--------------------------------------------------------------------- */
|
||||||
|
_asm{
|
||||||
|
+ mov [_save_ebx], ebx
|
||||||
|
pxor mm0, mm0 /* generate constant 0 */
|
||||||
|
mov esi, [_src]
|
||||||
|
mov edi, [_residue]
|
||||||
|
@@ -143,6 +148,7 @@
|
||||||
|
movq [edx + ebx], mm7 /* #2 write row */
|
||||||
|
lea edx, [edx+ebx*2] /* dst += stride * 2 */
|
||||||
|
jne nextchunk
|
||||||
|
+ mov ebx, [_save_ebx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -150,6 +156,7 @@
|
||||||
|
void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
|
||||||
|
const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2,
|
||||||
|
int _src2_ystride,const ogg_int16_t *_residue){
|
||||||
|
+ int _save_ebx;
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
This function does the inter2 reconstruction step.The building of the
|
||||||
|
average is done with a bit-twiddeling trick to avoid excessive register
|
||||||
|
@@ -166,6 +173,7 @@
|
||||||
|
using the pavgb instruction let me know and I'll do the 3dnow codepath.
|
||||||
|
--------------------------------------------------------------------- */
|
||||||
|
_asm{
|
||||||
|
+ mov [_save_ebx], ebx
|
||||||
|
mov eax, 0xfefefefe
|
||||||
|
mov esi, [_src1]
|
||||||
|
mov edi, [_src2]
|
||||||
|
@@ -204,6 +212,7 @@
|
||||||
|
packuswb mm2, mm3 /* pack and saturate */
|
||||||
|
movq [edx], mm2 /* write row */
|
||||||
|
jne nextrow
|
||||||
|
+ mov ebx, [_save_ebx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Index: lib/dec/x86_vc/mmxloopfilter.c
|
||||||
|
===================================================================
|
||||||
|
--- lib/dec/x86_vc/mmxloopfilter.c (revision 16142)
|
||||||
|
+++ lib/dec/x86_vc/mmxloopfilter.c (working copy)
|
||||||
|
@@ -38,7 +38,7 @@
|
||||||
|
_asm {
|
||||||
|
mov eax, [_pix]
|
||||||
|
mov edx, [_ystride]
|
||||||
|
- mov ebx, [_ll]
|
||||||
|
+ mov ecx, [_ll]
|
||||||
|
|
||||||
|
/* _pix -= ystride */
|
||||||
|
sub eax, edx
|
||||||
|
@@ -104,7 +104,7 @@
|
||||||
|
/*Free up mm5.*/
|
||||||
|
packuswb mm4, mm5
|
||||||
|
/*mm0=L L L L*/
|
||||||
|
- movq mm0, [ebx]
|
||||||
|
+ movq mm0, [ecx]
|
||||||
|
/*if(R_i<-2L||R_i>2L)R_i=0:*/
|
||||||
|
movq mm5, mm2
|
||||||
|
pxor mm6, mm6
|
@ -48,12 +48,10 @@ FORCE_STATIC_LIB= 1
|
|||||||
DEFINES += -DTHEORA_DISABLE_ENCODE
|
DEFINES += -DTHEORA_DISABLE_ENCODE
|
||||||
|
|
||||||
ifeq ($(findstring 86,$(OS_TEST)), 86)
|
ifeq ($(findstring 86,$(OS_TEST)), 86)
|
||||||
ifneq ($(OS_ARCH),WINNT)
|
|
||||||
ifneq ($(OS_ARCH),SunOS)
|
ifneq ($(OS_ARCH),SunOS)
|
||||||
DEFINES += -DOC_X86ASM -DUSE_ASM
|
DEFINES += -DOC_X86ASM -DUSE_ASM
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
VPATH := $(srcdir) $(srcdir)/dec
|
VPATH := $(srcdir) $(srcdir)/dec
|
||||||
|
|
||||||
@ -75,6 +73,17 @@ CSRCS = \
|
|||||||
$(NULL)
|
$(NULL)
|
||||||
|
|
||||||
ifeq ($(findstring 86,$(OS_TEST)), 86)
|
ifeq ($(findstring 86,$(OS_TEST)), 86)
|
||||||
|
ifeq ($(OS_ARCH),WINNT)
|
||||||
|
VPATH += $(srcdir)/dec/x86_vc
|
||||||
|
|
||||||
|
CSRCS += \
|
||||||
|
mmxfrag.c \
|
||||||
|
mmxloopfilter.c \
|
||||||
|
x86state.c \
|
||||||
|
mmxstate.c \
|
||||||
|
mmxidct.c \
|
||||||
|
$(NULL)
|
||||||
|
else
|
||||||
VPATH += $(srcdir)/dec/x86
|
VPATH += $(srcdir)/dec/x86
|
||||||
|
|
||||||
CSRCS += \
|
CSRCS += \
|
||||||
@ -84,6 +93,7 @@ CSRCS += \
|
|||||||
mmxidct.c \
|
mmxidct.c \
|
||||||
$(NULL)
|
$(NULL)
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
include $(topsrcdir)/config/rules.mk
|
include $(topsrcdir)/config/rules.mk
|
||||||
|
|
||||||
|
@ -27,12 +27,14 @@
|
|||||||
|
|
||||||
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
|
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
|
||||||
const ogg_int16_t *_residue){
|
const ogg_int16_t *_residue){
|
||||||
|
int _save_ebx;
|
||||||
/* ---------------------------------------------------------------------
|
/* ---------------------------------------------------------------------
|
||||||
This function does the inter reconstruction step with 8 iterations
|
This function does the inter reconstruction step with 8 iterations
|
||||||
unrolled. The iteration for each instruction is noted by the #id in the
|
unrolled. The iteration for each instruction is noted by the #id in the
|
||||||
comments (in case you want to reconstruct it)
|
comments (in case you want to reconstruct it)
|
||||||
--------------------------------------------------------------------- */
|
--------------------------------------------------------------------- */
|
||||||
_asm{
|
_asm{
|
||||||
|
mov [_save_ebx], ebx
|
||||||
mov edi, [_residue] /* load residue ptr */
|
mov edi, [_residue] /* load residue ptr */
|
||||||
mov eax, 0x00800080 /* generate constant */
|
mov eax, 0x00800080 /* generate constant */
|
||||||
mov ebx, [_dst_ystride] /* load dst-stride */
|
mov ebx, [_dst_ystride] /* load dst-stride */
|
||||||
@ -93,6 +95,7 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
|
|||||||
packuswb mm3, mm4 /* #8 pack to byte */
|
packuswb mm3, mm4 /* #8 pack to byte */
|
||||||
movq [edx + ecx*2], mm1 /* #7 write row */
|
movq [edx + ecx*2], mm1 /* #7 write row */
|
||||||
movq [edx + eax], mm3 /* #8 write row */
|
movq [edx + eax], mm3 /* #8 write row */
|
||||||
|
mov ebx, [_save_ebx]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,6 +103,7 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
|
|||||||
|
|
||||||
void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
|
void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
|
||||||
const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
|
const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
|
||||||
|
int _save_ebx;
|
||||||
/* ---------------------------------------------------------------------
|
/* ---------------------------------------------------------------------
|
||||||
This function does the inter reconstruction step with two iterations
|
This function does the inter reconstruction step with two iterations
|
||||||
running in parallel to hide some load-latencies and break the dependency
|
running in parallel to hide some load-latencies and break the dependency
|
||||||
@ -107,6 +111,7 @@ void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
|
|||||||
comments (in case you want to reconstruct it)
|
comments (in case you want to reconstruct it)
|
||||||
--------------------------------------------------------------------- */
|
--------------------------------------------------------------------- */
|
||||||
_asm{
|
_asm{
|
||||||
|
mov [_save_ebx], ebx
|
||||||
pxor mm0, mm0 /* generate constant 0 */
|
pxor mm0, mm0 /* generate constant 0 */
|
||||||
mov esi, [_src]
|
mov esi, [_src]
|
||||||
mov edi, [_residue]
|
mov edi, [_residue]
|
||||||
@ -143,6 +148,7 @@ nextchunk:
|
|||||||
movq [edx + ebx], mm7 /* #2 write row */
|
movq [edx + ebx], mm7 /* #2 write row */
|
||||||
lea edx, [edx+ebx*2] /* dst += stride * 2 */
|
lea edx, [edx+ebx*2] /* dst += stride * 2 */
|
||||||
jne nextchunk
|
jne nextchunk
|
||||||
|
mov ebx, [_save_ebx]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,6 +156,7 @@ nextchunk:
|
|||||||
void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
|
void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
|
||||||
const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2,
|
const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2,
|
||||||
int _src2_ystride,const ogg_int16_t *_residue){
|
int _src2_ystride,const ogg_int16_t *_residue){
|
||||||
|
int _save_ebx;
|
||||||
/* ---------------------------------------------------------------------
|
/* ---------------------------------------------------------------------
|
||||||
This function does the inter2 reconstruction step.The building of the
|
This function does the inter2 reconstruction step.The building of the
|
||||||
average is done with a bit-twiddeling trick to avoid excessive register
|
average is done with a bit-twiddeling trick to avoid excessive register
|
||||||
@ -166,6 +173,7 @@ void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
|
|||||||
using the pavgb instruction let me know and I'll do the 3dnow codepath.
|
using the pavgb instruction let me know and I'll do the 3dnow codepath.
|
||||||
--------------------------------------------------------------------- */
|
--------------------------------------------------------------------- */
|
||||||
_asm{
|
_asm{
|
||||||
|
mov [_save_ebx], ebx
|
||||||
mov eax, 0xfefefefe
|
mov eax, 0xfefefefe
|
||||||
mov esi, [_src1]
|
mov esi, [_src1]
|
||||||
mov edi, [_src2]
|
mov edi, [_src2]
|
||||||
@ -204,6 +212,7 @@ nextrow:
|
|||||||
packuswb mm2, mm3 /* pack and saturate */
|
packuswb mm2, mm3 /* pack and saturate */
|
||||||
movq [edx], mm2 /* write row */
|
movq [edx], mm2 /* write row */
|
||||||
jne nextrow
|
jne nextrow
|
||||||
|
mov ebx, [_save_ebx]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,
|
|||||||
_asm {
|
_asm {
|
||||||
mov eax, [_pix]
|
mov eax, [_pix]
|
||||||
mov edx, [_ystride]
|
mov edx, [_ystride]
|
||||||
mov ebx, [_ll]
|
mov ecx, [_ll]
|
||||||
|
|
||||||
/* _pix -= ystride */
|
/* _pix -= ystride */
|
||||||
sub eax, edx
|
sub eax, edx
|
||||||
@ -104,7 +104,7 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,
|
|||||||
/*Free up mm5.*/
|
/*Free up mm5.*/
|
||||||
packuswb mm4, mm5
|
packuswb mm4, mm5
|
||||||
/*mm0=L L L L*/
|
/*mm0=L L L L*/
|
||||||
movq mm0, [ebx]
|
movq mm0, [ecx]
|
||||||
/*if(R_i<-2L||R_i>2L)R_i=0:*/
|
/*if(R_i<-2L||R_i>2L)R_i=0:*/
|
||||||
movq mm5, mm2
|
movq mm5, mm2
|
||||||
pxor mm6, mm6
|
pxor mm6, mm6
|
||||||
|
@ -53,3 +53,4 @@ cp $1/include/theora/theora.h ./include/theora/theora.h
|
|||||||
cp $1/include/theora/theoradec.h ./include/theora/theoradec.h
|
cp $1/include/theora/theoradec.h ./include/theora/theoradec.h
|
||||||
cp $1/include/theora/codec.h ./include/theora/codec.h
|
cp $1/include/theora/codec.h ./include/theora/codec.h
|
||||||
patch -p3 <455357_wince_local_variable_macro_clash_patch
|
patch -p3 <455357_wince_local_variable_macro_clash_patch
|
||||||
|
patch -p0 <bug498770.patch
|
||||||
|
Loading…
Reference in New Issue
Block a user