Changes of Revision 9

libx264.changes Changed
x
 
1
@@ -1,4 +1,9 @@
2
 -------------------------------------------------------------------
3
+Sun Mar  1 09:33:42 UTC 2015 - i@margueirte.su
4
+
5
+- update version 20141218
6
+
7
+-------------------------------------------------------------------
8
 Wed Nov  5 12:33:30 UTC 2014 - i@margueirte.su
9
 
10
 - update version 20141104
11
libx264.spec Changed
19
 
1
@@ -1,7 +1,7 @@
2
 #
3
 # spec file for package libx264
4
 #
5
-# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
6
+# Copyright (c) 2015 SUSE LINUX GmbH, Nuernberg, Germany.
7
 #
8
 # All modifications and additions to the file contributed by third parties
9
 # remain the property of their copyright owners, unless otherwise agreed
10
@@ -17,7 +17,7 @@
11
 
12
 
13
 %define soname  142
14
-%define svn     20141104
15
+%define svn     20141218
16
 Name:           libx264
17
 Version:        0.%{soname}svn%{svn}
18
 Release:        0
19
x264-use-shared-library.patch Changed
26
 
1
@@ -1,17 +1,17 @@
2
-Index: x264-snapshot-20130723-2245/Makefile
3
+Index: x264-snapshot-20141218-2245/Makefile
4
 ===================================================================
5
---- x264-snapshot-20130723-2245.orig/Makefile
6
-+++ x264-snapshot-20130723-2245/Makefile
7
-@@ -171,6 +171,7 @@ $(LIBX264): $(GENERATED) .depend $(OBJS)
8
+--- x264-snapshot-20141218-2245.orig/Makefile
9
++++ x264-snapshot-20141218-2245/Makefile
10
+@@ -176,6 +176,7 @@ $(LIBX264): $(GENERATED) .depend $(OBJS)
11
  
12
  $(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO)
13
    $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
14
 +  ln -s $(SONAME) libx264.so
15
  
16
  ifneq ($(EXE),)
17
- .PHONY: x264 checkasm
18
-@@ -178,8 +179,8 @@ x264: x264$(EXE)
19
- checkasm: checkasm$(EXE)
20
+ .PHONY: x264 checkasm example
21
+@@ -184,8 +185,8 @@ checkasm: checkasm$(EXE)
22
+ example: example$(EXE)
23
  endif
24
  
25
 -x264$(EXE): $(GENERATED) .depend $(OBJCLI) $(CLI_LIBX264)
26
x264-snapshot-20141104-2245.tar.bz2/Makefile -> x264-snapshot-20141218-2245.tar.bz2/Makefile Changed
42
 
1
@@ -36,6 +36,8 @@
2
 
3
 OBJCHK = tools/checkasm.o
4
 
5
+OBJEXAMPLE = example.o
6
+
7
 CONFIG := $(shell cat config.h)
8
 
9
 # GPL-only files
10
@@ -176,9 +178,10 @@
11
    $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS)
12
 
13
 ifneq ($(EXE),)
14
-.PHONY: x264 checkasm
15
+.PHONY: x264 checkasm example
16
 x264: x264$(EXE)
17
 checkasm: checkasm$(EXE)
18
+example: example$(EXE)
19
 endif
20
 
21
 x264$(EXE): $(GENERATED) .depend $(OBJCLI) $(CLI_LIBX264)
22
@@ -187,7 +190,10 @@
23
 checkasm$(EXE): $(GENERATED) .depend $(OBJCHK) $(LIBX264)
24
    $(LD)$@ $(OBJCHK) $(LIBX264) $(LDFLAGS)
25
 
26
-$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK): .depend
27
+example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264)
28
+   $(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS)
29
+
30
+$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJEXAMPLE): .depend
31
 
32
 %.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm
33
    $(AS) $(ASFLAGS) -o $@ $<
34
@@ -254,6 +260,7 @@
35
 clean:
36
    rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a *.lib *.exp *.pdb x264 x264.exe .depend TAGS
37
    rm -f checkasm checkasm.exe $(OBJCHK) $(GENERATED) x264_lookahead.clbin
38
+   rm -f example example.exe $(OBJEXAMPLE)
39
    rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc
40
 
41
 distclean: clean
42
x264-snapshot-20141104-2245.tar.bz2/common/common.c -> x264-snapshot-20141218-2245.tar.bz2/common/common.c Changed
62
 
1
@@ -517,7 +517,7 @@
2
 static int parse_enum( const char *arg, const char * const *names, int *dst )
3
 {
4
     for( int i = 0; names[i]; i++ )
5
-        if( !strcmp( arg, names[i] ) )
6
+        if( !strcasecmp( arg, names[i] ) )
7
         {
8
             *dst = i;
9
             return 0;
10
@@ -540,12 +540,12 @@
11
 static int x264_atobool( const char *str, int *b_error )
12
 {
13
     if( !strcmp(str, "1") ||
14
-        !strcmp(str, "true") ||
15
-        !strcmp(str, "yes") )
16
+        !strcasecmp(str, "true") ||
17
+        !strcasecmp(str, "yes") )
18
         return 1;
19
     if( !strcmp(str, "0") ||
20
-        !strcmp(str, "false") ||
21
-        !strcmp(str, "no") )
22
+        !strcasecmp(str, "false") ||
23
+        !strcasecmp(str, "no") )
24
         return 0;
25
     *b_error = 1;
26
     return 0;
27
@@ -614,7 +614,7 @@
28
     OPT("asm")
29
     {
30
         p->cpu = isdigit(value[0]) ? atoi(value) :
31
-                 !strcmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0;
32
+                 !strcasecmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0;
33
         if( b_error )
34
         {
35
             char *buf = strdup(value);
36
@@ -635,14 +635,14 @@
37
     }
38
     OPT("threads")
39
     {
40
-        if( !strcmp(value, "auto") )
41
+        if( !strcasecmp(value, "auto") )
42
             p->i_threads = X264_THREADS_AUTO;
43
         else
44
             p->i_threads = atoi(value);
45
     }
46
     OPT("lookahead-threads")
47
     {
48
-        if( !strcmp(value, "auto") )
49
+        if( !strcasecmp(value, "auto") )
50
             p->i_lookahead_threads = X264_THREADS_AUTO;
51
         else
52
             p->i_lookahead_threads = atoi(value);
53
@@ -651,7 +651,7 @@
54
         p->b_sliced_threads = atobool(value);
55
     OPT("sync-lookahead")
56
     {
57
-        if( !strcmp(value, "auto") )
58
+        if( !strcasecmp(value, "auto") )
59
             p->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
60
         else
61
             p->i_sync_lookahead = atoi(value);
62
x264-snapshot-20141104-2245.tar.bz2/common/dct.c -> x264-snapshot-20141218-2245.tar.bz2/common/dct.c Changed
17
 
1
@@ -611,7 +611,6 @@
2
     {
3
         dctf->sub4x4_dct    = x264_sub4x4_dct_mmx;
4
         dctf->add4x4_idct   = x264_add4x4_idct_mmx;
5
-        dctf->dct4x4dc      = x264_dct4x4dc_mmx;
6
         dctf->idct4x4dc     = x264_idct4x4dc_mmx;
7
         dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_mmx2;
8
 
9
@@ -630,6 +629,7 @@
10
 
11
     if( cpu&X264_CPU_MMX2 )
12
     {
13
+        dctf->dct4x4dc         = x264_dct4x4dc_mmx2;
14
         dctf->add8x8_idct_dc   = x264_add8x8_idct_dc_mmx2;
15
         dctf->add16x16_idct_dc = x264_add16x16_idct_dc_mmx2;
16
     }
17
x264-snapshot-20141104-2245.tar.bz2/common/osdep.h -> x264-snapshot-20141218-2245.tar.bz2/common/osdep.h Changed
10
 
1
@@ -142,8 +142,6 @@
2
 #define ALIGNED_ARRAY_N ALIGNED_ARRAY_16
3
 #endif
4
 
5
-#define UNINIT(x) x=x
6
-
7
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
8
 #define UNUSED __attribute__((unused))
9
 #define ALWAYS_INLINE __attribute__((always_inline)) inline
10
x264-snapshot-20141104-2245.tar.bz2/common/pixel.c -> x264-snapshot-20141218-2245.tar.bz2/common/pixel.c Changed
9
 
1
@@ -1040,6 +1040,7 @@
2
         INIT2_NAME( sad_aligned, sad, _avx2 );
3
         INIT2( sad_x3, _avx2 );
4
         INIT2( sad_x4, _avx2 );
5
+        pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx2;
6
         pixf->vsad = x264_pixel_vsad_avx2;
7
         pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx2;
8
         pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_avx2;
9
x264-snapshot-20141104-2245.tar.bz2/common/quant.c -> x264-snapshot-20141218-2245.tar.bz2/common/quant.c Changed
19
 
1
@@ -558,8 +558,6 @@
2
     if( cpu&X264_CPU_MMX )
3
     {
4
 #if ARCH_X86
5
-        pf->quant_4x4 = x264_quant_4x4_mmx;
6
-        pf->quant_8x8 = x264_quant_8x8_mmx;
7
         pf->dequant_4x4 = x264_dequant_4x4_mmx;
8
         pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2;
9
         pf->dequant_8x8 = x264_dequant_8x8_mmx;
10
@@ -576,6 +574,8 @@
11
     {
12
         pf->quant_2x2_dc = x264_quant_2x2_dc_mmx2;
13
 #if ARCH_X86
14
+        pf->quant_4x4 = x264_quant_4x4_mmx2;
15
+        pf->quant_8x8 = x264_quant_8x8_mmx2;
16
         pf->quant_4x4_dc = x264_quant_4x4_dc_mmx2;
17
         pf->decimate_score15 = x264_decimate_score15_mmx2;
18
         pf->decimate_score16 = x264_decimate_score16_mmx2;
19
x264-snapshot-20141104-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20141218-2245.tar.bz2/common/x86/dct-a.asm Changed
10
 
1
@@ -143,7 +143,7 @@
2
 DCT4x4_DC
3
 %else
4
 
5
-INIT_MMX mmx
6
+INIT_MMX mmx2
7
 cglobal dct4x4dc, 1,1
8
     movq   m3, [r0+24]
9
     movq   m2, [r0+16]
10
x264-snapshot-20141104-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20141218-2245.tar.bz2/common/x86/dct.h Changed
10
 
1
@@ -70,7 +70,7 @@
2
 void x264_add16x16_idct_dc_avx  ( pixel   *p_dst, dctcoef dct    [16] );
3
 void x264_add16x16_idct_dc_avx2 ( uint8_t *p_dst, int16_t dct    [16] );
4
 
5
-void x264_dct4x4dc_mmx       ( int16_t d[16] );
6
+void x264_dct4x4dc_mmx2      ( int16_t d[16] );
7
 void x264_dct4x4dc_sse2      ( int32_t d[16] );
8
 void x264_dct4x4dc_avx       ( int32_t d[16] );
9
 void x264_idct4x4dc_mmx      ( int16_t d[16] );
10
x264-snapshot-20141104-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20141218-2245.tar.bz2/common/x86/pixel-a.asm Changed
86
 
1
@@ -727,15 +727,11 @@
2
 %endmacro
3
 
4
 %macro VAR_END 2
5
-%if HIGH_BIT_DEPTH
6
-%if mmsize == 8 && %1*%2 == 256
7
+%if HIGH_BIT_DEPTH && mmsize == 8 && %1*%2 == 256
8
     HADDUW  m5, m2
9
 %else
10
     HADDW   m5, m2
11
 %endif
12
-%else ; !HIGH_BIT_DEPTH
13
-    HADDW   m5, m2
14
-%endif ; HIGH_BIT_DEPTH
15
     HADDD   m6, m1
16
 %if ARCH_X86_64
17
     punpckldq m5, m6
18
@@ -772,20 +768,17 @@
19
     mova      m4, [r0+%1+mmsize]
20
 %else ; !HIGH_BIT_DEPTH
21
     mova      m0, [r0]
22
-    punpckhbw m1, m0, m7
23
     mova      m3, [r0+%1]
24
-    mova      m4, m3
25
+    punpckhbw m1, m0, m7
26
     punpcklbw m0, m7
27
+    punpckhbw m4, m3, m7
28
+    punpcklbw m3, m7
29
 %endif ; HIGH_BIT_DEPTH
30
 %ifidn %1, r1
31
     lea       r0, [r0+%1*2]
32
 %else
33
     add       r0, r1
34
 %endif
35
-%if HIGH_BIT_DEPTH == 0
36
-    punpcklbw m3, m7
37
-    punpckhbw m4, m7
38
-%endif ; !HIGH_BIT_DEPTH
39
     VAR_CORE
40
     dec r2d
41
     jg .loop
42
@@ -900,17 +893,26 @@
43
 VAR
44
 INIT_XMM xop
45
 VAR
46
+%endif ; !HIGH_BIT_DEPTH
47
 
48
 INIT_YMM avx2
49
 cglobal pixel_var_16x16, 2,4,7
50
+    FIX_STRIDES r1
51
     VAR_START 0
52
     mov      r2d, 4
53
     lea       r3, [r1*3]
54
 .loop:
55
+%if HIGH_BIT_DEPTH
56
+    mova      m0, [r0]
57
+    mova      m3, [r0+r1]
58
+    mova      m1, [r0+r1*2]
59
+    mova      m4, [r0+r3]
60
+%else
61
     pmovzxbw  m0, [r0]
62
     pmovzxbw  m3, [r0+r1]
63
     pmovzxbw  m1, [r0+r1*2]
64
     pmovzxbw  m4, [r0+r3]
65
+%endif
66
     lea       r0, [r0+r1*4]
67
     VAR_CORE
68
     dec r2d
69
@@ -929,7 +931,6 @@
70
     movd   edx, xm6
71
 %endif
72
     RET
73
-%endif ; !HIGH_BIT_DEPTH
74
 
75
 %macro VAR2_END 3
76
     HADDW   %2, xm1
77
@@ -1600,7 +1601,7 @@
78
 %macro SATDS_SSE2 0
79
 %define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
80
 
81
-%if vertical==0 || HIGH_BIT_DEPTH
82
+%if cpuflag(ssse3) && (vertical==0 || HIGH_BIT_DEPTH)
83
 cglobal pixel_satd_4x4, 4, 6, 6
84
     SATD_START_MMX
85
     mova m4, [hmul_4p]
86
x264-snapshot-20141104-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20141218-2245.tar.bz2/common/x86/predict-c.c Changed
19
 
1
@@ -65,12 +65,17 @@
2
     H += i * ( src[j+i - FDEC_STRIDE ]  - src[j-i - FDEC_STRIDE ] );\
3
     V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );
4
 
5
+#if HAVE_X86_INLINE_ASM
6
+#if HIGH_BIT_DEPTH
7
 ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
8
 ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
9
 ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
10
+#else // !HIGH_BIT_DEPTH
11
 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
12
 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
13
 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
14
+#endif // HIGH_BIT_DEPTH
15
+#endif // HAVE_X86_INLINE_ASM
16
 
17
 #define PREDICT_16x16_P_CORE\
18
     int H = 0;\
19
x264-snapshot-20141104-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20141218-2245.tar.bz2/common/x86/quant-a.asm Changed
10
 
1
@@ -453,7 +453,7 @@
2
 QUANT_DC quant_2x2_dc, 1
3
 %if ARCH_X86_64 == 0 ; not needed because sse2 is faster
4
 QUANT_DC quant_4x4_dc, 4
5
-INIT_MMX mmx
6
+INIT_MMX mmx2
7
 QUANT_AC quant_4x4, 4
8
 QUANT_AC quant_8x8, 16
9
 %endif
10
x264-snapshot-20141104-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20141218-2245.tar.bz2/common/x86/quant.h Changed
12
 
1
@@ -30,8 +30,8 @@
2
 
3
 int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias );
4
 int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias );
5
-int x264_quant_4x4_mmx( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
6
-int x264_quant_8x8_mmx( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
7
+int x264_quant_4x4_mmx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
8
+int x264_quant_8x8_mmx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
9
 int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias );
10
 int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias );
11
 int x264_quant_4x4_sse2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
12
x264-snapshot-20141104-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20141218-2245.tar.bz2/common/x86/x86inc.asm Changed
201
 
1
@@ -1044,15 +1044,16 @@
2
 %endmacro
3
 
4
 ;%1 == instruction
5
-;%2 == 1 if float, 0 if int
6
-;%3 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
7
-;%4 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
8
-;%5+: operands
9
-%macro RUN_AVX_INSTR 5-8+
10
-    %ifnum sizeof%6
11
+;%2 == minimal instruction set
12
+;%3 == 1 if float, 0 if int
13
+;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
14
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
15
+;%6+: operands
16
+%macro RUN_AVX_INSTR 6-9+
17
+    %ifnum sizeof%7
18
+        %assign __sizeofreg sizeof%7
19
+    %elifnum sizeof%6
20
         %assign __sizeofreg sizeof%6
21
-    %elifnum sizeof%5
22
-        %assign __sizeofreg sizeof%5
23
     %else
24
         %assign __sizeofreg mmsize
25
     %endif
26
@@ -1061,325 +1062,333 @@
27
         %xdefine __instr v%1
28
     %else
29
         %xdefine __instr %1
30
-        %if %0 >= 7+%3
31
+        %if %0 >= 8+%4
32
             %assign __emulate_avx 1
33
         %endif
34
     %endif
35
+    %ifnidn %2, fnord
36
+        %ifdef cpuname
37
+            %if notcpuflag(%2)
38
+                %error use of ``%1'' %2 instruction in cpuname function: current_function
39
+            %endif
40
+        %endif
41
+    %endif
42
 
43
     %if __emulate_avx
44
-        %xdefine __src1 %6
45
-        %xdefine __src2 %7
46
-        %ifnidn %5, %6
47
-            %if %0 >= 8
48
-                CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8
49
+        %xdefine __src1 %7
50
+        %xdefine __src2 %8
51
+        %ifnidn %6, %7
52
+            %if %0 >= 9
53
+                CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9
54
             %else
55
-                CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
56
+                CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8
57
             %endif
58
-            %if %4 && %3 == 0
59
-                %ifnid %7
60
+            %if %5 && %4 == 0
61
+                %ifnid %8
62
                     ; 3-operand AVX instructions with a memory arg can only have it in src2,
63
                     ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
64
                     ; So, if the instruction is commutative with a memory arg, swap them.
65
-                    %xdefine __src1 %7
66
-                    %xdefine __src2 %6
67
+                    %xdefine __src1 %8
68
+                    %xdefine __src2 %7
69
                 %endif
70
             %endif
71
             %if __sizeofreg == 8
72
-                MOVQ %5, __src1
73
-            %elif %2
74
-                MOVAPS %5, __src1
75
+                MOVQ %6, __src1
76
+            %elif %3
77
+                MOVAPS %6, __src1
78
             %else
79
-                MOVDQA %5, __src1
80
+                MOVDQA %6, __src1
81
             %endif
82
         %endif
83
-        %if %0 >= 8
84
-            %1 %5, __src2, %8
85
+        %if %0 >= 9
86
+            %1 %6, __src2, %9
87
         %else
88
-            %1 %5, __src2
89
+            %1 %6, __src2
90
         %endif
91
-    %elif %0 >= 8
92
-        __instr %5, %6, %7, %8
93
+    %elif %0 >= 9
94
+        __instr %6, %7, %8, %9
95
+    %elif %0 == 8
96
+        __instr %6, %7, %8
97
     %elif %0 == 7
98
-        __instr %5, %6, %7
99
-    %elif %0 == 6
100
-        __instr %5, %6
101
+        __instr %6, %7
102
     %else
103
-        __instr %5
104
+        __instr %6
105
     %endif
106
 %endmacro
107
 
108
 ;%1 == instruction
109
-;%2 == 1 if float, 0 if int
110
-;%3 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
111
-;%4 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
112
-%macro AVX_INSTR 1-4 0, 1, 0
113
-    %macro %1 1-9 fnord, fnord, fnord, fnord, %1, %2, %3, %4
114
+;%2 == minimal instruction set
115
+;%3 == 1 if float, 0 if int
116
+;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
117
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
118
+%macro AVX_INSTR 1-5 fnord, 0, 1, 0
119
+    %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5
120
         %ifidn %2, fnord
121
-            RUN_AVX_INSTR %6, %7, %8, %9, %1
122
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1
123
         %elifidn %3, fnord
124
-            RUN_AVX_INSTR %6, %7, %8, %9, %1, %2
125
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2
126
         %elifidn %4, fnord
127
-            RUN_AVX_INSTR %6, %7, %8, %9, %1, %2, %3
128
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3
129
         %elifidn %5, fnord
130
-            RUN_AVX_INSTR %6, %7, %8, %9, %1, %2, %3, %4
131
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4
132
         %else
133
-            RUN_AVX_INSTR %6, %7, %8, %9, %1, %2, %3, %4, %5
134
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5
135
         %endif
136
     %endmacro
137
 %endmacro
138
 
139
 ; Instructions with both VEX and non-VEX encodings
140
 ; Non-destructive instructions are written without parameters
141
-AVX_INSTR addpd, 1, 0, 1
142
-AVX_INSTR addps, 1, 0, 1
143
-AVX_INSTR addsd, 1, 0, 1
144
-AVX_INSTR addss, 1, 0, 1
145
-AVX_INSTR addsubpd, 1, 0, 0
146
-AVX_INSTR addsubps, 1, 0, 0
147
-AVX_INSTR aesdec, 0, 0, 0
148
-AVX_INSTR aesdeclast, 0, 0, 0
149
-AVX_INSTR aesenc, 0, 0, 0
150
-AVX_INSTR aesenclast, 0, 0, 0
151
+AVX_INSTR addpd, sse2, 1, 0, 1
152
+AVX_INSTR addps, sse, 1, 0, 1
153
+AVX_INSTR addsd, sse2, 1, 0, 1
154
+AVX_INSTR addss, sse, 1, 0, 1
155
+AVX_INSTR addsubpd, sse3, 1, 0, 0
156
+AVX_INSTR addsubps, sse3, 1, 0, 0
157
+AVX_INSTR aesdec, fnord, 0, 0, 0
158
+AVX_INSTR aesdeclast, fnord, 0, 0, 0
159
+AVX_INSTR aesenc, fnord, 0, 0, 0
160
+AVX_INSTR aesenclast, fnord, 0, 0, 0
161
 AVX_INSTR aesimc
162
 AVX_INSTR aeskeygenassist
163
-AVX_INSTR andnpd, 1, 0, 0
164
-AVX_INSTR andnps, 1, 0, 0
165
-AVX_INSTR andpd, 1, 0, 1
166
-AVX_INSTR andps, 1, 0, 1
167
-AVX_INSTR blendpd, 1, 0, 0
168
-AVX_INSTR blendps, 1, 0, 0
169
-AVX_INSTR blendvpd, 1, 0, 0
170
-AVX_INSTR blendvps, 1, 0, 0
171
-AVX_INSTR cmppd, 1, 1, 0
172
-AVX_INSTR cmpps, 1, 1, 0
173
-AVX_INSTR cmpsd, 1, 1, 0
174
-AVX_INSTR cmpss, 1, 1, 0
175
-AVX_INSTR comisd
176
-AVX_INSTR comiss
177
-AVX_INSTR cvtdq2pd
178
-AVX_INSTR cvtdq2ps
179
-AVX_INSTR cvtpd2dq
180
-AVX_INSTR cvtpd2ps
181
-AVX_INSTR cvtps2dq
182
-AVX_INSTR cvtps2pd
183
-AVX_INSTR cvtsd2si
184
-AVX_INSTR cvtsd2ss
185
-AVX_INSTR cvtsi2sd
186
-AVX_INSTR cvtsi2ss
187
-AVX_INSTR cvtss2sd
188
-AVX_INSTR cvtss2si
189
-AVX_INSTR cvttpd2dq
190
-AVX_INSTR cvttps2dq
191
-AVX_INSTR cvttsd2si
192
-AVX_INSTR cvttss2si
193
-AVX_INSTR divpd, 1, 0, 0
194
-AVX_INSTR divps, 1, 0, 0
195
-AVX_INSTR divsd, 1, 0, 0
196
-AVX_INSTR divss, 1, 0, 0
197
-AVX_INSTR dppd, 1, 1, 0
198
-AVX_INSTR dpps, 1, 1, 0
199
-AVX_INSTR extractps
200
-AVX_INSTR haddpd, 1, 0, 0
201
x264-snapshot-20141104-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20141218-2245.tar.bz2/encoder/cavlc.c Changed
17
 
1
@@ -289,6 +289,7 @@
2
                 x264_cavlc_block_residual( h, DCT_LUMA_4x4, i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16] );
3
 }
4
 
5
+#if RDO_SKIP_BS
6
 static ALWAYS_INLINE void x264_cavlc_partition_luma_residual( x264_t *h, int i8, int p )
7
 {
8
     if( h->mb.b_transform_8x8 && h->mb.cache.non_zero_count[x264_scan8[i8*4]] )
9
@@ -299,6 +300,7 @@
10
         for( int i4 = 0; i4 < 4; i4++ )
11
             x264_cavlc_block_residual( h, DCT_LUMA_4x4, i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16] );
12
 }
13
+#endif
14
 
15
 static void x264_cavlc_mb_header_i( x264_t *h, int i_mb_type, int i_mb_i_offset, int chroma )
16
 {
17
x264-snapshot-20141104-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20141218-2245.tar.bz2/encoder/ratecontrol.c Changed
117
 
1
@@ -2191,6 +2191,8 @@
2
 
3
     if( rcc->b_vbv && rcc->last_satd > 0 )
4
     {
5
+        double fenc_cpb_duration = (double)h->fenc->i_cpb_duration *
6
+                                   h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
7
         /* Lookahead VBV: raise the quantizer as necessary such that no frames in
8
          * the lookahead overflow and such that the buffer is in a reasonable state
9
          * by the end of the lookahead. */
10
@@ -2206,6 +2208,7 @@
11
                 double buffer_fill_cur = rcc->buffer_fill - cur_bits;
12
                 double target_fill;
13
                 double total_duration = 0;
14
+                double last_duration = fenc_cpb_duration;
15
                 frame_q[0] = h->sh.i_type == SLICE_TYPE_I ? q * h->param.rc.f_ip_factor : q;
16
                 frame_q[1] = frame_q[0] * h->param.rc.f_pb_factor;
17
                 frame_q[2] = frame_q[0] / h->param.rc.f_ip_factor;
18
@@ -2213,8 +2216,8 @@
19
                 /* Loop over the planned future frames. */
20
                 for( int j = 0; buffer_fill_cur >= 0 && buffer_fill_cur <= rcc->buffer_size; j++ )
21
                 {
22
-                    total_duration += h->fenc->f_planned_cpb_duration[j];
23
-                    buffer_fill_cur += rcc->vbv_max_rate * h->fenc->f_planned_cpb_duration[j];
24
+                    total_duration += last_duration;
25
+                    buffer_fill_cur += rcc->vbv_max_rate * last_duration;
26
                     int i_type = h->fenc->i_planned_type[j];
27
                     int i_satd = h->fenc->i_planned_satd[j];
28
                     if( i_type == X264_TYPE_AUTO )
29
@@ -2222,6 +2225,7 @@
30
                     i_type = IS_X264_TYPE_I( i_type ) ? SLICE_TYPE_I : IS_X264_TYPE_B( i_type ) ? SLICE_TYPE_B : SLICE_TYPE_P;
31
                     cur_bits = predict_size( &rcc->pred[i_type], frame_q[i_type], i_satd );
32
                     buffer_fill_cur -= cur_bits;
33
+                    last_duration = h->fenc->f_planned_cpb_duration[j];
34
                 }
35
                 /* Try to get to get the buffer at least 50% filled, but don't set an impossible goal. */
36
                 target_fill = X264_MIN( rcc->buffer_fill + total_duration * rcc->vbv_max_rate * 0.5, rcc->buffer_size * 0.5 );
37
@@ -2255,45 +2259,44 @@
38
             /* Now a hard threshold to make sure the frame fits in VBV.
39
              * This one is mostly for I-frames. */
40
             double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
41
-            double qf = 1.0;
42
             /* For small VBVs, allow the frame to use up the entire VBV. */
43
             double max_fill_factor = h->param.rc.i_vbv_buffer_size >= 5*h->param.rc.i_vbv_max_bitrate / rcc->fps ? 2 : 1;
44
             /* For single-frame VBVs, request that the frame use up the entire VBV. */
45
             double min_fill_factor = rcc->single_frame_vbv ? 1 : 2;
46
 
47
             if( bits > rcc->buffer_fill/max_fill_factor )
48
-                qf = x264_clip3f( rcc->buffer_fill/(max_fill_factor*bits), 0.2, 1.0 );
49
-            q /= qf;
50
-            bits *= qf;
51
+            {
52
+                double qf = x264_clip3f( rcc->buffer_fill/(max_fill_factor*bits), 0.2, 1.0 );
53
+                q /= qf;
54
+                bits *= qf;
55
+            }
56
             if( bits < rcc->buffer_rate/min_fill_factor )
57
-                q *= bits*min_fill_factor/rcc->buffer_rate;
58
+            {
59
+                double qf = x264_clip3f( bits*min_fill_factor/rcc->buffer_rate, 0.001, 1.0 );
60
+                q *= qf;
61
+            }
62
             q = X264_MAX( q0, q );
63
         }
64
 
65
-        /* Apply MinCR restrictions */
66
-        double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
67
-        if( bits > rcc->frame_size_maximum )
68
-            q *= bits / rcc->frame_size_maximum;
69
-        bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
70
-
71
         /* Check B-frame complexity, and use up any bits that would
72
          * overflow before the next P-frame. */
73
         if( h->sh.i_type == SLICE_TYPE_P && !rcc->single_frame_vbv )
74
         {
75
             int nb = rcc->bframes;
76
+            double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
77
             double pbbits = bits;
78
             double bbits = predict_size( rcc->pred_b_from_p, q * h->param.rc.f_pb_factor, rcc->last_satd );
79
             double space;
80
             double bframe_cpb_duration = 0;
81
             double minigop_cpb_duration;
82
             for( int i = 0; i < nb; i++ )
83
-                bframe_cpb_duration += h->fenc->f_planned_cpb_duration[1+i];
84
+                bframe_cpb_duration += h->fenc->f_planned_cpb_duration[i];
85
 
86
             if( bbits * nb > bframe_cpb_duration * rcc->vbv_max_rate )
87
                 nb = 0;
88
             pbbits += nb * bbits;
89
 
90
-            minigop_cpb_duration = bframe_cpb_duration + h->fenc->f_planned_cpb_duration[0];
91
+            minigop_cpb_duration = bframe_cpb_duration + fenc_cpb_duration;
92
             space = rcc->buffer_fill + minigop_cpb_duration*rcc->vbv_max_rate - rcc->buffer_size;
93
             if( pbbits < space )
94
             {
95
@@ -2302,6 +2305,12 @@
96
             q = X264_MAX( q0/2, q );
97
         }
98
 
99
+        /* Apply MinCR and buffer fill restrictions */
100
+        double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
101
+        double frame_size_maximum = X264_MIN( rcc->frame_size_maximum, X264_MAX( rcc->buffer_fill, 0.001 ) );
102
+        if( bits > frame_size_maximum )
103
+            q *= bits / frame_size_maximum;
104
+
105
         if( !rcc->b_vbv_min_rate )
106
             q = X264_MAX( q0, q );
107
     }
108
@@ -2326,7 +2335,7 @@
109
 {
110
     float q;
111
     x264_ratecontrol_t *rcc = h->rc;
112
-    ratecontrol_entry_t UNINIT(rce);
113
+    ratecontrol_entry_t rce = {0};
114
     int pict_type = h->sh.i_type;
115
     int64_t total_bits = 8*(h->stat.i_frame_size[SLICE_TYPE_I]
116
                           + h->stat.i_frame_size[SLICE_TYPE_P]
117
x264-snapshot-20141104-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20141218-2245.tar.bz2/encoder/rdo.c Changed
10
 
1
@@ -186,7 +186,7 @@
2
     h->mb.b_transform_8x8 = b_transform_bak;
3
     h->mb.i_type = type_bak;
4
 
5
-    return i_ssd + i_bits;
6
+    return X264_MIN( i_ssd + i_bits, COST_MAX );
7
 }
8
 
9
 /* partition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */
10
x264-snapshot-20141104-2245.tar.bz2/encoder/set.c -> x264-snapshot-20141218-2245.tar.bz2/encoder/set.c Changed
51
 
1
@@ -543,7 +543,8 @@
2
 void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )
3
 {
4
     bs_t q;
5
-    uint8_t tmp_buf[100];
6
+    ALIGNED_4( uint8_t tmp_buf[100] );
7
+    M32( tmp_buf ) = 0; // shut up gcc
8
     bs_init( &q, tmp_buf, 100 );
9
 
10
     bs_realign( &q );
11
@@ -595,7 +596,8 @@
12
 {
13
     x264_sps_t *sps = h->sps;
14
     bs_t q;
15
-    uint8_t tmp_buf[100];
16
+    ALIGNED_4( uint8_t tmp_buf[100] );
17
+    M32( tmp_buf ) = 0; // shut up gcc
18
     bs_init( &q, tmp_buf, 100 );
19
 
20
     bs_realign( &q );
21
@@ -617,7 +619,8 @@
22
 {
23
     x264_sps_t *sps = h->sps;
24
     bs_t q;
25
-    uint8_t tmp_buf[100];
26
+    ALIGNED_4( uint8_t tmp_buf[100] );
27
+    M32( tmp_buf ) = 0; // shut up gcc
28
     bs_init( &q, tmp_buf, 100 );
29
 
30
     bs_realign( &q );
31
@@ -648,7 +651,8 @@
32
 {
33
     int quincunx_sampling_flag = h->param.i_frame_packing == 0;
34
     bs_t q;
35
-    uint8_t tmp_buf[100];
36
+    ALIGNED_4( uint8_t tmp_buf[100] );
37
+    M32( tmp_buf ) = 0; // shut up gcc
38
     bs_init( &q, tmp_buf, 100 );
39
 
40
     bs_realign( &q );
41
@@ -701,7 +705,8 @@
42
 {
43
     x264_slice_header_t *sh = &h->sh_backup;
44
     bs_t q;
45
-    uint8_t tmp_buf[100];
46
+    ALIGNED_4( uint8_t tmp_buf[100] );
47
+    M32( tmp_buf ) = 0; // shut up gcc
48
     bs_init( &q, tmp_buf, 100 );
49
 
50
     bs_realign( &q );
51
x264-snapshot-20141104-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20141218-2245.tar.bz2/encoder/slicetype.c Changed
17
 
1
@@ -1853,14 +1853,11 @@
2
         if( i )
3
         {
4
             x264_calculate_durations( h, h->lookahead->next.list[i], h->lookahead->next.list[i-1], &h->i_cpb_delay, &h->i_coded_fields );
5
-            h->lookahead->next.list[0]->f_planned_cpb_duration[i-1] = (double)h->lookahead->next.list[i-1]->i_cpb_duration *
6
+            h->lookahead->next.list[0]->f_planned_cpb_duration[i-1] = (double)h->lookahead->next.list[i]->i_cpb_duration *
7
                                                                       h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
8
         }
9
         else
10
             x264_calculate_durations( h, h->lookahead->next.list[i], NULL, &h->i_cpb_delay, &h->i_coded_fields );
11
-
12
-        h->lookahead->next.list[0]->f_planned_cpb_duration[i] = (double)h->lookahead->next.list[i]->i_cpb_duration *
13
-                                                                h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
14
     }
15
 }
16
 
17
x264-snapshot-20141218-2245.tar.bz2/example.c Added
157
 
1
@@ -0,0 +1,155 @@
2
+/*****************************************************************************
3
+ * example.c: libx264 API usage example
4
+ *****************************************************************************
5
+ * Copyright (C) 2014 x264 project
6
+ *
7
+ * Authors: Anton Mitrofanov <BugMaster@narod.ru>
8
+ *
9
+ * This program is free software; you can redistribute it and/or modify
10
+ * it under the terms of the GNU General Public License as published by
11
+ * the Free Software Foundation; either version 2 of the License, or
12
+ * (at your option) any later version.
13
+ *
14
+ * This program is distributed in the hope that it will be useful,
15
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
+ * GNU General Public License for more details.
18
+ *
19
+ * You should have received a copy of the GNU General Public License
20
+ * along with this program; if not, write to the Free Software
21
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22
+ *
23
+ * This program is also available under a commercial proprietary license.
24
+ * For more information, contact us at licensing@x264.com.
25
+ *****************************************************************************/
26
+
27
+#ifdef _WIN32
28
+/* The following two defines must be located before the inclusion of any system header files. */
29
+#define WINVER       0x0500
30
+#define _WIN32_WINNT 0x0500
31
+#include <windows.h>
32
+#include <io.h>       /* _setmode() */
33
+#include <fcntl.h>    /* _O_BINARY */
34
+#endif
35
+
36
+#include <stdint.h>
37
+#include <stdio.h>
38
+#include <signal.h>
39
+#include <x264.h>
40
+
41
+/* Ctrl-C handler */
42
+static volatile int b_ctrl_c = 0;
43
+static void sigint_handler( int a )
44
+{
45
+    b_ctrl_c = 1;
46
+}
47
+
48
+#define FAIL_IF_ERROR( cond, ... )\
49
+do\
50
+{\
51
+    if( cond )\
52
+    {\
53
+        fprintf( stderr, __VA_ARGS__ );\
54
+        goto fail;\
55
+    }\
56
+} while( 0 )
57
+
58
+int main( int argc, char **argv )
59
+{
60
+    int width, height;
61
+    x264_param_t param;
62
+    x264_picture_t pic;
63
+    x264_picture_t pic_out;
64
+    x264_t *h;
65
+    int i_frame = 0;
66
+    int i_frame_size;
67
+    x264_nal_t *nal;
68
+    int i_nal;
69
+
70
+#ifdef _WIN32
71
+    _setmode( _fileno( stdin ),  _O_BINARY );
72
+    _setmode( _fileno( stdout ), _O_BINARY );
73
+    _setmode( _fileno( stderr ), _O_BINARY );
74
+#endif
75
+
76
+    /* Control-C handler */
77
+    signal( SIGINT, sigint_handler );
78
+
79
+    FAIL_IF_ERROR( !(argc > 1), "Example usage: example 352x288 <input.yuv >output.h264\n" );
80
+    FAIL_IF_ERROR( 2 != sscanf( argv[1], "%dx%d", &width, &height ), "resolution not specified or incorrect\n" );
81
+
82
+    /* Get default params for preset/tuning */
83
+    if( x264_param_default_preset( &param, "medium", NULL ) < 0 )
84
+        goto fail;
85
+
86
+    /* Configure non-default params */
87
+    param.i_csp = X264_CSP_I420;
88
+    param.i_width  = width;
89
+    param.i_height = height;
90
+    param.b_vfr_input = 0;
91
+    param.b_repeat_headers = 1;
92
+    param.b_annexb = 1;
93
+
94
+    /* Apply profile restrictions. */
95
+    if( x264_param_apply_profile( &param, "high" ) < 0 )
96
+        goto fail;
97
+
98
+    if( x264_picture_alloc( &pic, param.i_csp, param.i_width, param.i_height ) < 0 )
99
+        goto fail;
100
+#undef fail
101
+#define fail fail2
102
+
103
+    h = x264_encoder_open( &param );
104
+    if( !h )
105
+        goto fail;
106
+#undef fail
107
+#define fail fail3
108
+
109
+    /* Encode frames */
110
+    for( ; !b_ctrl_c; i_frame++ )
111
+    {
112
+        /* Read input frame */
113
+        int plane_size = width * height;
114
+        if( fread( pic.img.plane[0], 1, plane_size, stdin ) != plane_size )
115
+            break;
116
+        plane_size = ((width + 1) >> 1) * ((height + 1) >> 1);
117
+        if( fread( pic.img.plane[1], 1, plane_size, stdin ) != plane_size )
118
+            break;
119
+        if( fread( pic.img.plane[2], 1, plane_size, stdin ) != plane_size )
120
+            break;
121
+
122
+        pic.i_pts = i_frame;
123
+        i_frame_size = x264_encoder_encode( h, &nal, &i_nal, &pic, &pic_out );
124
+        if( i_frame_size < 0 )
125
+            goto fail;
126
+        else if( i_frame_size )
127
+        {
128
+            if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) )
129
+                goto fail;
130
+        }
131
+    }
132
+    /* Flush delayed frames */
133
+    while( !b_ctrl_c && x264_encoder_delayed_frames( h ) )
134
+    {
135
+        i_frame_size = x264_encoder_encode( h, &nal, &i_nal, NULL, &pic_out );
136
+        if( i_frame_size < 0 )
137
+            goto fail;
138
+        else if( i_frame_size )
139
+        {
140
+            if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) )
141
+                goto fail;
142
+        }
143
+    }
144
+
145
+    x264_encoder_close( h );
146
+    x264_picture_clean( &pic );
147
+    return 0;
148
+
149
+#undef fail
150
+fail3:
151
+    x264_encoder_close( h );
152
+fail2:
153
+    x264_picture_clean( &pic );
154
+fail:
155
+    return -1;
156
+}
157
x264-snapshot-20141104-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20141218-2245.tar.bz2/tools/checkasm.c Changed
12
 
1
@@ -90,7 +90,9 @@
2
 {
3
     uint32_t a = 0;
4
 #if HAVE_X86_INLINE_ASM
5
-    asm volatile( "rdtsc" : "=a"(a) :: "edx", "memory" );
6
+    asm volatile( "lfence \n"
7
+                  "rdtsc  \n"
8
+                  : "=a"(a) :: "edx", "memory" );
9
 #elif ARCH_PPC
10
     asm volatile( "mftb %0" : "=r"(a) :: "memory" );
11
 #elif ARCH_ARM     // ARMv7 only
12