Changes of Revision 6

libx264.changes Changed
x
 
1
@@ -1,4 +1,9 @@
2
 -------------------------------------------------------------------
3
+Sat Mar 22 17:10:14 UTC 2014 - i@margueirte.su
4
+
5
+- update version 20140321.
6
+
7
+-------------------------------------------------------------------
8
 Tue Nov 19 07:53:08 UTC 2013 - obs@botter.cc
9
 
10
 - add -fno-aggressive-loop-optimizations to extra-cflags in
11
libx264.spec Changed
12
 
1
@@ -14,8 +14,8 @@
2
 # Please submit bugfixes or comments via http://bugs.links2linux.org/
3
 
4
 Name:           libx264
5
-%define soname  135
6
-%define svn     20130723
7
+%define soname  142
8
+%define svn     20140321
9
 Version:        0.%{soname}svn%{svn}
10
 Release:        1
11
 License:        GPL-2.0+
12
x264-snapshot-20130723-2245.tar.bz2/common/display-x11.c Deleted
220
 
1
@@ -1,218 +0,0 @@
2
-/*****************************************************************************
3
- * display-x11.c: x11 interface
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-#include <X11/Xlib.h>
28
-#include <X11/Xutil.h>
29
-#include <stdio.h>
30
-#include <stdlib.h>
31
-#include <string.h>
32
-
33
-#include "common.h"
34
-#include "display.h"
35
-
36
-static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask;
37
-
38
-static Display *disp_display = NULL;
39
-static struct disp_window
40
-{
41
-    int init;
42
-    Window window;
43
-} disp_window[10];
44
-
45
-static inline void disp_chkerror( int cond, char *e )
46
-{
47
-    if( !cond )
48
-        return;
49
-    fprintf( stderr, "error: %s\n", e ? e : "?" );
50
-    abort();
51
-}
52
-
53
-static void disp_init_display( void )
54
-{
55
-    Visual *visual;
56
-    int dpy_class;
57
-    int screen;
58
-    int dpy_depth;
59
-
60
-    if( disp_display )
61
-        return;
62
-    memset( &disp_window, 0, sizeof(disp_window) );
63
-    disp_display = XOpenDisplay( "" );
64
-    disp_chkerror( !disp_display, "no display" );
65
-    screen = DefaultScreen( disp_display );
66
-    visual = DefaultVisual( disp_display, screen );
67
-    dpy_class = visual->class;
68
-    dpy_depth = DefaultDepth( disp_display, screen );
69
-    disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32)
70
-        || (dpy_class == TrueColor && dpy_depth == 24)
71
-        || (dpy_class == TrueColor && dpy_depth == 16)
72
-        || (dpy_class == PseudoColor && dpy_depth == 8)),
73
-        "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" );
74
-}
75
-
76
-static void disp_init_window( int num, int width, int height, const unsigned char *title )
77
-{
78
-    XSetWindowAttributes xswa;
79
-    XEvent xev;
80
-    int screen = DefaultScreen(disp_display);
81
-    Visual *visual = DefaultVisual (disp_display, screen);
82
-    char buf[200];
83
-    Window window;
84
-
85
-    if( title )
86
-        snprintf( buf, 200, "%s: %i/disp", title, num );
87
-    else
88
-        snprintf( buf, 200, "%i/disp", num );
89
-
90
-    XSizeHints *shint = XAllocSizeHints();
91
-    disp_chkerror( !shint, "memerror" );
92
-    shint->min_width = shint->max_width = shint->width = width;
93
-    shint->min_height = shint->max_height = shint->height = height;
94
-    shint->flags = PSize | PMinSize | PMaxSize;
95
-    disp_chkerror( num < 0 || num >= 10, "bad win num" );
96
-    if( !disp_window[num].init )
97
-    {
98
-        unsigned int mask = 0;
99
-        disp_window[num].init = 1;
100
-        unsigned int bg = WhitePixel( disp_display, screen );
101
-        unsigned int fg = BlackPixel( disp_display, screen );
102
-        int dpy_depth = DefaultDepth( disp_display, screen );
103
-        if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 )
104
-        {
105
-            mask |= CWColormap;
106
-            xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone );
107
-        }
108
-        xswa.background_pixel = bg;
109
-        xswa.border_pixel = fg;
110
-        xswa.backing_store = Always;
111
-        xswa.backing_planes = -1;
112
-        xswa.bit_gravity = NorthWestGravity;
113
-        mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity;
114
-        window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ),
115
-                                shint->x, shint->y, shint->width, shint->height,
116
-                                1, dpy_depth, InputOutput, visual, mask, &xswa );
117
-        disp_window[num].window = window;
118
-
119
-        XSelectInput( disp_display, window, event_mask );
120
-        XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
121
-        XMapWindow( disp_display, window );
122
-
123
-        do {
124
-            XNextEvent( disp_display, &xev );
125
-        } while( xev.type != MapNotify || xev.xmap.event != window );
126
-    }
127
-    window = disp_window[num].window;
128
-    XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
129
-    XResizeWindow( disp_display, window, width, height );
130
-    XSync( disp_display, 1 );
131
-    XFree( shint );
132
-}
133
-
134
-void disp_sync( void )
135
-{
136
-    XSync( disp_display, 1 );
137
-}
138
-
139
-void disp_setcolor( unsigned char *name )
140
-{
141
-    XColor c_exact, c_nearest;
142
-
143
-    int screen = DefaultScreen( disp_display );
144
-    GC gc = DefaultGC( disp_display, screen );
145
-    Colormap cm = DefaultColormap( disp_display, screen );
146
-    Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact );
147
-    disp_chkerror( st != 1, "XAllocNamedColor error" );
148
-    XSetForeground( disp_display, gc, c_nearest.pixel );
149
-}
150
-
151
-void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title )
152
-{
153
-    char dummy;
154
-
155
-    disp_init_display();
156
-    disp_init_window( num, width, height, title );
157
-    int screen = DefaultScreen( disp_display );
158
-    Visual *visual = DefaultVisual( disp_display, screen );
159
-    int dpy_depth = DefaultDepth( disp_display, screen );
160
-    XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 );
161
-    disp_chkerror( !ximage, "no ximage" );
162
-#if WORDS_BIGENDIAN
163
-    ximage->byte_order = MSBFirst;
164
-    ximage->bitmap_bit_order = MSBFirst;
165
-#else
166
-    ximage->byte_order = LSBFirst;
167
-    ximage->bitmap_bit_order = LSBFirst;
168
-#endif
169
-
170
-    int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char);
171
-    uint8_t *image = malloc( width * height * pixelsize );
172
-    disp_chkerror( !image, "malloc failed" );
173
-    for( int y = 0; y < height; y++ )
174
-        for( int x = 0; x < width; x++ )
175
-            memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize );
176
-    ximage->data = image;
177
-    GC gc = DefaultGC( disp_display, screen );
178
-
179
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
180
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
181
-
182
-    XDestroyImage( ximage );
183
-    XSync( disp_display, 1 );
184
-
185
-}
186
-
187
-void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom)
188
-{
189
-    unsigned char *dataz = malloc( width*zoom * height*zoom );
190
-    disp_chkerror( !dataz, "malloc" );
191
-    for( int y = 0; y < height; y++ )
192
-        for( int x = 0; x < width; x++ )
193
-            for( int y0 = 0; y0 < zoom; y0++ )
194
-                for( int x0 = 0; x0 < zoom; x0++ )
195
-                    dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x];
196
-    disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title );
197
-    free( dataz );
198
-}
199
-
200
-void disp_point( int num, int x1, int y1 )
201
-{
202
-    int screen = DefaultScreen( disp_display );
203
-    GC gc = DefaultGC( disp_display, screen );
204
-    XDrawPoint( disp_display, disp_window[num].window, gc, x1, y1 );
205
-}
206
-
207
-void disp_line( int num, int x1, int y1, int x2, int y2 )
208
-{
209
-    int screen = DefaultScreen( disp_display );
210
-    GC gc = DefaultGC( disp_display, screen );
211
-    XDrawLine( disp_display, disp_window[num].window, gc, x1, y1, x2, y2 );
212
-}
213
-
214
-void disp_rect( int num, int x1, int y1, int x2, int y2 )
215
-{
216
-    int screen = DefaultScreen( disp_display );
217
-    GC gc = DefaultGC( disp_display, screen );
218
-    XDrawRectangle( disp_display, disp_window[num].window, gc, x1, y1, x2-x1, y2-y1 );
219
-}
220
x264-snapshot-20130723-2245.tar.bz2/common/display.h Deleted
43
 
1
@@ -1,41 +0,0 @@
2
-/*****************************************************************************
3
- * display.h: x11 visualization interface
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-#ifndef X264_DISPLAY_H
28
-#define X264_DISPLAY_H
29
-
30
-void disp_sync(void);
31
-void disp_setcolor(unsigned char *name);
32
-/* Display a region of byte wide memory as a grayscale image.
33
- * num is the window to use for displaying. */
34
-void disp_gray(int num, char *data, int width, int height,
35
-               int stride, const unsigned char *title);
36
-void disp_gray_zoom(int num, char *data, int width, int height,
37
-               int stride, const unsigned char *title, int zoom);
38
-void disp_point(int num, int x1, int y1);
39
-void disp_line(int num, int x1, int y1, int x2, int y2);
40
-void disp_rect(int num, int x1, int y1, int x2, int y2);
41
-
42
-#endif
43
x264-snapshot-20130723-2245.tar.bz2/common/visualize.c Deleted
343
 
1
@@ -1,341 +0,0 @@
2
-/*****************************************************************************
3
- * visualize.c: visualization
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-/*
28
- * Some explanation of the symbols used:
29
- * Red/pink: intra block
30
- * Blue: inter block
31
- * Green: skip block
32
- * Yellow: B-block (not visualized properly yet)
33
- *
34
- * Motion vectors have black dot at their target (ie. at the MB center),
35
- * instead of arrowhead. The black dot is enclosed in filled diamond with radius
36
- * depending on reference frame number (one frame back = zero width, normal case).
37
- *
38
- * The intra blocks have generally lines drawn perpendicular
39
- * to the prediction direction, so for example, if there is a pink block
40
- * with horizontal line at the top of it, it is interpolated by assuming
41
- * luma to be vertically constant.
42
- * DC predicted blocks have both horizontal and vertical lines,
43
- * pink blocks with a diagonal line are predicted using the planar function.
44
- */
45
-
46
-#include "common.h"
47
-#include "visualize.h"
48
-#include "display.h"
49
-
50
-typedef struct
51
-{
52
-    int     i_type;
53
-    int     i_partition;
54
-    int     i_sub_partition[4];
55
-    int     i_intra16x16_pred_mode;
56
-    int     intra4x4_pred_mode[4][4];
57
-    int8_t  ref[2][4][4];                  /* [list][y][x] */
58
-    int16_t mv[2][4][4][2];                /* [list][y][x][mvxy] */
59
-} visualize_t;
60
-
61
-/* Return string from stringlist corresponding to the given code */
62
-#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code)
63
-
64
-typedef struct
65
-{
66
-    int code;
67
-    char *string;
68
-} stringlist_t;
69
-
70
-static char *get_string( const stringlist_t *sl, int entries, int code )
71
-{
72
-    for( int i = 0; i < entries; i++ )
73
-        if( sl[i].code == code )
74
-            return sl[i].string;
75
-    return "?";
76
-}
77
-
78
-/* Plot motion vector */
79
-static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
80
-{
81
-    int dx = dmv[0];
82
-    int dy = dmv[1];
83
-
84
-    dx = (dx * zoom + 2) >> 2;
85
-    dy = (dy * zoom + 2) >> 2;
86
-    disp_line( 0, x0, y0, x0+dx, y0+dy );
87
-    for( int i = 1; i < ref; i++ )
88
-    {
89
-        disp_line( 0, x0  , y0-i, x0+i, y0   );
90
-        disp_line( 0, x0+i, y0  , x0  , y0+i );
91
-        disp_line( 0, x0  , y0+i, x0-i, y0   );
92
-        disp_line( 0, x0-i, y0  , x0  , y0-i );
93
-    }
94
-    disp_setcolor( "black" );
95
-    disp_point( 0, x0, y0 );
96
-    disp_setcolor( col );
97
-}
98
-
99
-int x264_visualize_init( x264_t *h )
100
-{
101
-    CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
102
-    return 0;
103
-fail:
104
-    return -1;
105
-}
106
-
107
-void x264_visualize_mb( x264_t *h )
108
-{
109
-    visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy;
110
-
111
-    /* Save all data for the MB that we need for drawing the visualization */
112
-    v->i_type = h->mb.i_type;
113
-    v->i_partition = h->mb.i_partition;
114
-    for( int i = 0; i < 4; i++ )
115
-        v->i_sub_partition[i] = h->mb.i_sub_partition[i];
116
-    for( int y = 0; y < 4; y++ )
117
-        for( int x = 0; x < 4; x++ )
118
-            v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x];
119
-    for( int l = 0; l < 2; l++ )
120
-        for( int y = 0; y < 4; y++ )
121
-            for( int x = 0; x < 4; x++ )
122
-            {
123
-                for( int i = 0; i < 2; i++ )
124
-                    v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i];
125
-                v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x];
126
-            }
127
-    v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode;
128
-}
129
-
130
-void x264_visualize_close( x264_t *h )
131
-{
132
-    x264_free(h->visualize);
133
-}
134
-
135
-/* Display visualization (block types, MVs) of the encoded frame */
136
-/* FIXME: B-type MBs not handled yet properly */
137
-void x264_visualize_show( x264_t *h )
138
-{
139
-    static const stringlist_t mb_types[] =
140
-    {
141
-        /* Block types marked as NULL will not be drawn */
142
-        { I_4x4   , "red" },
143
-        { I_8x8   , "#ff5640" },
144
-        { I_16x16 , "#ff8060" },
145
-        { I_PCM   , "violet" },
146
-        { P_L0    , "SlateBlue" },
147
-        { P_8x8   , "blue" },
148
-        { P_SKIP  , "green" },
149
-        { B_DIRECT, "yellow" },
150
-        { B_L0_L0 , "yellow" },
151
-        { B_L0_L1 , "yellow" },
152
-        { B_L0_BI , "yellow" },
153
-        { B_L1_L0 , "yellow" },
154
-        { B_L1_L1 , "yellow" },
155
-        { B_L1_BI , "yellow" },
156
-        { B_BI_L0 , "yellow" },
157
-        { B_BI_L1 , "yellow" },
158
-        { B_BI_BI , "yellow" },
159
-        { B_8x8   , "yellow" },
160
-        { B_SKIP  , "yellow" },
161
-    };
162
-
163
-    static const int waitkey = 1;     /* Wait for enter after each frame */
164
-    static const int drawbox = 1;     /* Draw box around each block */
165
-    static const int borders = 0;     /* Display extrapolated borders outside frame */
166
-    static const int zoom = 2;        /* Zoom factor */
167
-
168
-    static const int pad = 32;
169
-    pixel *const frame = h->fdec->plane[0];
170
-    const int width = h->param.i_width;
171
-    const int height = h->param.i_height;
172
-    const int stride = h->fdec->i_stride[0];
173
-
174
-    if( borders )
175
-        disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom );
176
-    else
177
-        disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
178
-
179
-    for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
180
-    {
181
-        visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
182
-        const int mb_y = mb_xy / h->mb.i_mb_width;
183
-        const int mb_x = mb_xy % h->mb.i_mb_width;
184
-        char *const col = GET_STRING( mb_types, v->i_type );
185
-        int x = mb_x*16*zoom;
186
-        int y = mb_y*16*zoom;
187
-        int l = 0;
188
-
189
-        if( !col )
190
-            continue;
191
-
192
-        if( borders )
193
-        {
194
-            x += pad*zoom;
195
-            y += pad*zoom;
196
-        }
197
-
198
-        disp_setcolor( col );
199
-        if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 );
200
-
201
-        if( v->i_type==P_L0 || v->i_type==P_8x8 || v->i_type==P_SKIP )
202
-        {
203
-            /* Predicted (inter) mode, with motion vector */
204
-            if( v->i_partition == D_16x16 || v->i_type == P_SKIP )
205
-                mv( x+8*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
206
-            else if (v->i_partition == D_16x8)
207
-            {
208
-                if( drawbox ) disp_rect( 0, x, y, x+16*zoom, y+8*zoom );
209
-                mv( x+8*zoom, y+4*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
210
-                if( drawbox ) disp_rect( 0, x, y+8*zoom, x+16*zoom, y+16*zoom );
211
-                mv( x+8*zoom, y+12*zoom, v->mv[l][2][0], v->ref[l][2][0], zoom, col );
212
-            }
213
-            else if( v->i_partition==D_8x16 )
214
-            {
215
-                if( drawbox ) disp_rect( 0, x,          y, x+8*zoom,  y+16*zoom );
216
-                mv( x+4*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
217
-                if( drawbox ) disp_rect( 0, x+8*zoom,   y, x+16*zoom, y+16*zoom );
218
-                mv( x+12*zoom, y+8*zoom, v->mv[l][0][2], v->ref[l][0][2], zoom, col );
219
-            }
220
-            else if( v->i_partition==D_8x8 )
221
-            {
222
-                for( int i = 0; i < 2; i++ )
223
-                    for( int j = 0; j < 2; j++ )
224
-                    {
225
-                        int sp = v->i_sub_partition[i*2+j];
226
-                        const int x0 = x + j*8*zoom;
227
-                        const int y0 = y + i*8*zoom;
228
-                        l = x264_mb_partition_listX_table[0][sp] ? 0 : 1; /* FIXME: not tested if this works */
229
-                        if( IS_SUB8x8(sp) )
230
-                        {
231
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+8*zoom );
232
-                            mv( x0+4*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
233
-                        }
234
-                        else if( IS_SUB8x4(sp) )
235
-                        {
236
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+4*zoom );
237
-                            if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+8*zoom, y0+8*zoom );
238
-                            mv( x0+4*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
239
-                            mv( x0+4*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
240
-                        }
241
-                        else if( IS_SUB4x8(sp) )
242
-                        {
243
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+8*zoom );
244
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+8*zoom );
245
-                            mv( x0+2*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
246
-                            mv( x0+6*zoom, y0+4*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
247
-                        }
248
-                        else if( IS_SUB4x4(sp) )
249
-                        {
250
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+4*zoom );
251
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+4*zoom );
252
-                            if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+4*zoom, y0+8*zoom );
253
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0+4*zoom, x0+8*zoom, y0+8*zoom );
254
-                            mv( x0+2*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
255
-                            mv( x0+6*zoom, y0+2*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
256
-                            mv( x0+2*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
257
-                            mv( x0+6*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j+1], v->ref[l][2*i+1][2*j+1], zoom, col );
258
-                        }
259
-                    }
260
-            }
261
-        }
262
-
263
-        if( IS_INTRA(v->i_type) || v->i_type == I_PCM )
264
-        {
265
-            /* Intra coded */
266
-            if( v->i_type == I_16x16 )
267
-            {
268
-                switch (v->i_intra16x16_pred_mode) {
269
-                case I_PRED_16x16_V:
270
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
271
-                    break;
272
-                case I_PRED_16x16_H:
273
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
274
-                    break;
275
-                case I_PRED_16x16_DC:
276
-                case I_PRED_16x16_DC_LEFT:
277
-                case I_PRED_16x16_DC_TOP:
278
-                case I_PRED_16x16_DC_128:
279
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
280
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
281
-                    break;
282
-                case I_PRED_16x16_P:
283
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+8*zoom, y+8*zoom );
284
-                    break;
285
-                }
286
-            }
287
-            if( v->i_type==I_4x4 || v->i_type==I_8x8 )
288
-            {
289
-                const int di = v->i_type == I_8x8 ? 2 : 1;
290
-                const int zoom2 = zoom * di;
291
-                for( int i = 0; i < 4; i += di )
292
-                    for( int j = 0; j < 4; j += di )
293
-                    {
294
-                        const int x0 = x + j*4*zoom;
295
-                        const int y0 = y + i*4*zoom;
296
-                        if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom2, y0+4*zoom2 );
297
-                        switch( v->intra4x4_pred_mode[i][j] )
298
-                        {
299
-                            case I_PRED_4x4_V:        /* Vertical */
300
-                                disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
301
-                                break;
302
-                            case I_PRED_4x4_H:        /* Horizontal */
303
-                                disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
304
-                                break;
305
-                            case I_PRED_4x4_DC:        /* DC, average from top and left sides */
306
-                            case I_PRED_4x4_DC_LEFT:
307
-                            case I_PRED_4x4_DC_TOP:
308
-                            case I_PRED_4x4_DC_128:
309
-                                disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
310
-                                disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2 );
311
-                                break;
312
-                            case I_PRED_4x4_DDL:    /* Topright-bottomleft */
313
-                                disp_line( 0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2 );
314
-                                break;
315
-                            case I_PRED_4x4_DDR:    /* Topleft-bottomright */
316
-                                disp_line( 0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2 );
317
-                                break;
318
-                            case I_PRED_4x4_VR:        /* Mix of topleft-bottomright and vertical */
319
-                                disp_line( 0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2 );
320
-                                break;
321
-                            case I_PRED_4x4_HD:        /* Mix of topleft-bottomright and horizontal */
322
-                                disp_line( 0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
323
-                                break;
324
-                            case I_PRED_4x4_VL:        /* Mix of topright-bottomleft and vertical */
325
-                                disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2 );
326
-                                break;
327
-                            case I_PRED_4x4_HU:        /* Mix of topright-bottomleft and horizontal */
328
-                                disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2 );
329
-                                break;
330
-                        }
331
-                    }
332
-            }
333
-        }
334
-    }
335
-
336
-    disp_sync();
337
-    if( waitkey )
338
-        getchar();
339
-}
340
-/* }}} */
341
-
342
-//EOF
343
x264-snapshot-20130723-2245.tar.bz2/common/visualize.h Deleted
38
 
1
@@ -1,36 +0,0 @@
2
-/*****************************************************************************
3
- * visualize.h: visualization
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-#ifndef X264_VISUALIZE_H
28
-#define X264_VISUALIZE_H
29
-
30
-#include "common/common.h"
31
-
32
-int  x264_visualize_init( x264_t *h );
33
-void x264_visualize_mb( x264_t *h );
34
-void x264_visualize_show( x264_t *h );
35
-void x264_visualize_close( x264_t *h );
36
-
37
-#endif
38
x264-snapshot-20130723-2245.tar.bz2/tools/xyuv.c Deleted
794
 
1
@@ -1,792 +0,0 @@
2
-/*****************************************************************************
3
- * xyuv.c: a SDL yuv 420 planer viewer.
4
- *****************************************************************************
5
- * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr>
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License as published by
9
- * the Free Software Foundation; either version 2 of the License, or
10
- * (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
- *****************************************************************************/
21
-
22
-#include <stdlib.h>
23
-#include <stdio.h>
24
-#include <string.h>
25
-#include <stdint.h>
26
-
27
-#include <SDL/SDL.h>
28
-
29
-#define YUV_MAX 20
30
-#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps"
31
-typedef struct
32
-{
33
-    /* globals */
34
-    int     i_width;
35
-    int     i_height;
36
-    int     i_frame_size;
37
-    int     i_frame;
38
-    int     i_frames;
39
-    float   f_fps;
40
-
41
-    float   f_y;
42
-
43
-    int     b_pause;
44
-    int     b_grid;
45
-    int     b_split;
46
-    int     b_diff;
47
-    int     i_join;
48
-
49
-    /* Constructed picture */
50
-    int     i_wall_width;   /* in picture count */
51
-
52
-    /* YUV files */
53
-    int     i_yuv;
54
-    struct
55
-    {
56
-        char    *name;
57
-        FILE    *f;         /* handles */
58
-        int     i_frames;   /* frames count */
59
-
60
-        /* Position in the whole picture */
61
-        int     x, y;
62
-    } yuv[YUV_MAX];
63
-
64
-    /* SDL */
65
-    int i_sdl_width;
66
-    int i_sdl_height;
67
-
68
-    int i_display_width;
69
-    int i_display_height;
70
-    char *title;
71
-
72
-    SDL_Surface *screen;
73
-    SDL_Overlay *overlay;
74
-
75
-    /* */
76
-    uint8_t *pic;
77
-
78
-} xyuv_t;
79
-
80
-xyuv_t xyuv = {
81
-    .i_width = 0,
82
-    .i_height = 0,
83
-    .i_frame  = 1,
84
-    .i_frames = 0,
85
-    .f_fps = 25.0,
86
-    .f_y = 0.0,
87
-    .i_wall_width = 0,
88
-
89
-    .i_yuv = 0,
90
-
91
-    .b_pause = 0,
92
-    .b_split = 0,
93
-    .b_diff = 0,
94
-    .i_join = -1,
95
-
96
-    .title = NULL,
97
-    .pic = NULL,
98
-};
99
-
100
-static void help( void )
101
-{
102
-    fprintf( stderr,
103
-             "Syntax: xyuv [options] file [file2 ...]\n"
104
-             "\n"
105
-             "      --help                  Print this help\n"
106
-             "\n"
107
-             "  -s, --size <WIDTHxHEIGHT>   Set input size\n"
108
-             "  -w, --width <integer>       Set width\n"
109
-             "  -h, --height <integer>      Set height\n"
110
-             "\n"
111
-             "  -S, --split                 Show splited Y/U/V planes\n"
112
-             "  -d, --diff                  Show difference (only 2 files) in split mode\n"
113
-             "  -j, --joint <integer>\n"
114
-             "\n"
115
-             "  -y <float>                  Set Y factor\n"
116
-             "\n"
117
-             "  -g, --grid                  Show a grid (macroblock 16x16)\n"
118
-             "  -W <integer>                Set wall width (in picture count)\n"
119
-             "  -f, --fps <float>           Set fps\n"
120
-             "\n" );
121
-}
122
-
123
-static void xyuv_count_frames( xyuv_t *xyuv );
124
-static void xyuv_detect( int *pi_width, int *pi_height );
125
-static void xyuv_display( xyuv_t *xyuv, int i_frame );
126
-
127
-int main( int argc, char **argv )
128
-{
129
-    int i;
130
-
131
-    /* Parse commande line */
132
-    for( i = 1; i < argc; i++ ) {
133
-        if( !strcasecmp( argv[i], "--help" ) ) {
134
-            help();
135
-            return 0;
136
-        }
137
-        if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) {
138
-            xyuv.b_diff = 1;
139
-        } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) {
140
-            xyuv.b_split = 1;
141
-        } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) {
142
-            if( i >= argc -1 ) goto err_missing_arg;
143
-            xyuv.f_fps = atof( argv[++i] );
144
-        } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) {
145
-            if( i >= argc -1 ) goto err_missing_arg;
146
-            xyuv.i_height = atoi( argv[++i] );
147
-        } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) {
148
-            if( i >= argc -1 ) goto err_missing_arg;
149
-            xyuv.i_width = atoi( argv[++i] );
150
-        } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) {
151
-            char *p;
152
-
153
-            if( i >= argc -1 ) goto err_missing_arg;
154
-
155
-            xyuv.i_width = strtol( argv[++i], &p, 0 );
156
-            p++;
157
-            xyuv.i_height = atoi( p );
158
-        } else if( !strcmp( argv[i], "-W" ) ) {
159
-            if( i >= argc -1 ) goto err_missing_arg;
160
-            xyuv.i_wall_width = atoi( argv[++i] );
161
-        } else if( !strcmp( argv[i], "-y" ) ) {
162
-            if( i >= argc -1 ) goto err_missing_arg;
163
-            xyuv.f_y = atof( argv[++i] );
164
-        } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) {
165
-            if( i >= argc -1 ) goto err_missing_arg;
166
-            xyuv.i_join = atoi( argv[++i] );
167
-        } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) {
168
-            xyuv.b_grid = 1;
169
-        } else {
170
-            FILE *f = fopen( argv[i], "rb" );
171
-            if( !f ) {
172
-                fprintf( stderr, "cannot open YUV %s\n", argv[i] );
173
-            } else {
174
-                xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] );
175
-                xyuv.yuv[xyuv.i_yuv].f = f;
176
-                xyuv.yuv[xyuv.i_yuv].i_frames = 0;
177
-
178
-                xyuv.i_yuv++;
179
-            }
180
-        }
181
-    }
182
-
183
-    if( xyuv.i_yuv == 0 ) {
184
-        fprintf( stderr, "no file to display\n" );
185
-        return -1;
186
-    }
187
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
188
-        char *psz = xyuv.yuv[0].name;
189
-        char *num;
190
-        char *x;
191
-        /* See if we find widthxheight in the file name */
192
-        for( ;; ) {
193
-            if( !( x = strchr( psz+1, 'x' ) ) ) {
194
-                break;
195
-            }
196
-            num = x;
197
-            while( num > psz && num[-1] >= '0' && num[-1] <= '9' )
198
-                num--;
199
-
200
-            if( num != x && x[1] >= '0' && x[1] <= '9' ) {
201
-                xyuv.i_width = atoi( num );
202
-                xyuv.i_height = atoi( x+1 );
203
-                break;
204
-            }
205
-            psz = x;
206
-        }
207
-        fprintf( stderr, "file name gives %dx%d\n", xyuv.i_width, xyuv.i_height );
208
-    }
209
-
210
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
211
-        xyuv_detect( &xyuv.i_width, &xyuv.i_height );
212
-    }
213
-
214
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
215
-        fprintf( stderr, "invalid or missing frames size\n" );
216
-        return -1;
217
-    }
218
-    if( xyuv.b_diff && xyuv.i_yuv != 2 ) {
219
-        fprintf( stderr, "--diff works only with 2 files\n" );
220
-        return -1;
221
-    }
222
-    if( (xyuv.i_join == 0 || xyuv.i_join >= xyuv.i_width) && xyuv.i_yuv != 2 ) {
223
-        fprintf( stderr, "--join woeks only with two files and range is [1, width-1]\n" );
224
-        return -1;
225
-    }
226
-    if( xyuv.i_join % 2 != 0 ) {
227
-        if( xyuv.i_join + 1 < xyuv.i_width )
228
-            xyuv.i_join++;
229
-        else
230
-            xyuv.i_join--;
231
-    }
232
-
233
-    /* Now check frames */
234
-    fprintf( stderr, "displaying :\n" );
235
-    xyuv.i_frame_size = 3 * xyuv.i_width * xyuv.i_height / 2;
236
-    xyuv_count_frames( &xyuv );
237
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
238
-        fprintf( stderr, " - '%s' : %d frames\n", xyuv.yuv[i].name, xyuv.yuv[i].i_frames );
239
-    }
240
-
241
-    if( xyuv.i_frames == 0 ) {
242
-        fprintf( stderr, "no frames to display\n" );
243
-    }
244
-
245
-    xyuv.pic = malloc( xyuv.i_frame_size );
246
-
247
-    /* calculate SDL view */
248
-    if( xyuv.i_wall_width > xyuv.i_yuv ) {
249
-        xyuv.i_wall_width = xyuv.i_yuv;
250
-    }
251
-    if( xyuv.i_wall_width == 0 ) {
252
-        while( xyuv.i_wall_width < xyuv.i_yuv && xyuv.i_wall_width * xyuv.i_wall_width < xyuv.i_yuv ) {
253
-            xyuv.i_wall_width++;
254
-        }
255
-    }
256
-
257
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
258
-        if( xyuv.b_diff || xyuv.i_join > 0 ) {
259
-            xyuv.yuv[i].x = 0;
260
-            xyuv.yuv[i].y = 0;
261
-        } else if( xyuv.b_split ) {
262
-            xyuv.yuv[i].x = (i%xyuv.i_wall_width) * 3 * xyuv.i_width / 2;
263
-            xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
264
-        } else {
265
-            xyuv.yuv[i].x = (i%xyuv.i_wall_width) * xyuv.i_width;
266
-            xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
267
-        }
268
-    }
269
-    if( xyuv.b_diff ) {
270
-        xyuv.i_sdl_width = 3 * xyuv.i_width / 2;
271
-        xyuv.i_sdl_height= xyuv.i_height;
272
-    } else if( xyuv.i_join > 0 ) {
273
-        xyuv.i_sdl_width = xyuv.i_width;
274
-        xyuv.i_sdl_height= xyuv.i_height;
275
-    } else if( xyuv.b_split ) {
276
-        xyuv.i_sdl_width = xyuv.i_wall_width * 3 * xyuv.i_width / 2;
277
-        xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv  + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
278
-    } else {
279
-        xyuv.i_sdl_width = xyuv.i_wall_width * xyuv.i_width;
280
-        xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv  + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
281
-    }
282
-    xyuv.i_display_width = xyuv.i_sdl_width;
283
-    xyuv.i_display_height = xyuv.i_sdl_height;
284
-
285
-    /* Open SDL */
286
-    if( SDL_Init( SDL_INIT_EVENTTHREAD|SDL_INIT_NOPARACHUTE|SDL_INIT_VIDEO) ) {
287
-        fprintf( stderr, "cannot init SDL\n" );
288
-        return -1;
289
-    }
290
-
291
-    SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, 100 );
292
-    SDL_EventState( SDL_KEYUP, SDL_IGNORE );
293
-
294
-    xyuv.screen = SDL_SetVideoMode( xyuv.i_sdl_width, xyuv.i_sdl_height, 0,
295
-                                    SDL_HWSURFACE|SDL_RESIZABLE|
296
-                                    SDL_ASYNCBLIT|SDL_HWACCEL );
297
-    if( xyuv.screen == NULL ) {
298
-        fprintf( stderr, "SDL_SetVideoMode failed\n" );
299
-        return -1;
300
-    }
301
-
302
-    SDL_LockSurface( xyuv.screen );
303
-    xyuv.overlay = SDL_CreateYUVOverlay( xyuv.i_sdl_width, xyuv.i_sdl_height,
304
-                                         SDL_YV12_OVERLAY,
305
-                                         xyuv.screen );
306
-    /* reset with black */
307
-    memset( xyuv.overlay->pixels[0],   0, xyuv.overlay->pitches[0] * xyuv.i_sdl_height );
308
-    memset( xyuv.overlay->pixels[1], 128, xyuv.overlay->pitches[1] * xyuv.i_sdl_height / 2);
309
-    memset( xyuv.overlay->pixels[2], 128, xyuv.overlay->pitches[2] * xyuv.i_sdl_height / 2);
310
-    SDL_UnlockSurface( xyuv.screen );
311
-
312
-    if( xyuv.overlay == NULL ) {
313
-        fprintf( stderr, "recon: SDL_CreateYUVOverlay failed\n" );
314
-        return -1;
315
-    }
316
-
317
-    for( ;; ) {
318
-        SDL_Event event;
319
-        static int b_fullscreen = 0;
320
-        int64_t i_start = SDL_GetTicks();
321
-        int i_wait;
322
-
323
-        if( !xyuv.b_pause ) {
324
-            xyuv_display( &xyuv, xyuv.i_frame );
325
-        }
326
-
327
-        for( ;; ) {
328
-            int b_refresh = 0;
329
-            while( SDL_PollEvent( &event ) )  {
330
-                switch( event.type )
331
-                {
332
-                    case SDL_QUIT:
333
-                        if( b_fullscreen )
334
-                            SDL_WM_ToggleFullScreen( xyuv.screen );
335
-                        exit( 1 );
336
-
337
-                    case SDL_KEYDOWN:
338
-                        switch( event.key.keysym.sym )
339
-                        {
340
-                            case SDLK_q:
341
-                            case SDLK_ESCAPE:
342
-                                if( b_fullscreen )
343
-                                    SDL_WM_ToggleFullScreen( xyuv.screen );
344
-                                exit(1);
345
-
346
-                            case SDLK_f:
347
-                                if( SDL_WM_ToggleFullScreen( xyuv.screen ) )
348
-                                    b_fullscreen = 1 - b_fullscreen;
349
-                                break;
350
-
351
-                            case SDLK_g:
352
-                                if( xyuv.b_grid )
353
-                                    xyuv.b_grid = 0;
354
-                                else
355
-                                    xyuv.b_grid = 1;
356
-                                if( xyuv.b_pause )
357
-                                    b_refresh = 1;
358
-                                break;
359
-
360
-                            case SDLK_SPACE:
361
-                                if( xyuv.b_pause )
362
-                                    xyuv.b_pause = 0;
363
-                                else
364
-                                    xyuv.b_pause = 1;
365
-                                break;
366
-                            case SDLK_LEFT:
367
-                                if( xyuv.i_frame > 1 ) xyuv.i_frame--;
368
-                                b_refresh = 1;
369
-                                break;
370
-
371
-                            case SDLK_RIGHT:
372
-                                if( xyuv.i_frame >= xyuv.i_frames )
373
-                                    xyuv_count_frames( &xyuv );
374
-                                if( xyuv.i_frame < xyuv.i_frames ) xyuv.i_frame++;
375
-                                b_refresh = 1;
376
-                                break;
377
-
378
-                            case SDLK_HOME:
379
-                                xyuv.i_frame = 1;
380
-                                if( xyuv.b_pause )
381
-                                    b_refresh = 1;
382
-                                break;
383
-
384
-                            case SDLK_END:
385
-                                xyuv_count_frames( &xyuv );
386
-                                xyuv.i_frame = xyuv.i_frames;
387
-                                b_refresh = 1;
388
-                                break;
389
-
390
-                            case SDLK_UP:
391
-                                xyuv.i_frame += xyuv.i_frames / 20;
392
-
393
-                                if( xyuv.i_frame >= xyuv.i_frames )
394
-                                    xyuv_count_frames( &xyuv );
395
-
396
-                                if( xyuv.i_frame > xyuv.i_frames )
397
-                                    xyuv.i_frame = xyuv.i_frames;
398
-                                b_refresh = 1;
399
-                                break;
400
-
401
-                            case SDLK_DOWN:
402
-                                xyuv.i_frame -= xyuv.i_frames / 20;
403
-                                if( xyuv.i_frame < 1 )
404
-                                    xyuv.i_frame = 1;
405
-                                b_refresh = 1;
406
-                                break;
407
-
408
-                            case SDLK_PAGEUP:
409
-                                xyuv.i_frame += xyuv.i_frames / 10;
410
-
411
-                                if( xyuv.i_frame >= xyuv.i_frames )
412
-                                    xyuv_count_frames( &xyuv );
413
-
414
-                                if( xyuv.i_frame > xyuv.i_frames )
415
-                                    xyuv.i_frame = xyuv.i_frames;
416
-                                b_refresh = 1;
417
-                                break;
418
-
419
-                            case SDLK_PAGEDOWN:
420
-                                xyuv.i_frame -= xyuv.i_frames / 10;
421
-                                if( xyuv.i_frame < 1 )
422
-                                    xyuv.i_frame = 1;
423
-                                b_refresh = 1;
424
-                                break;
425
-
426
-                            default:
427
-                                break;
428
-                        }
429
-                        break;
430
-                    case SDL_VIDEORESIZE:
431
-                        xyuv.i_display_width = event.resize.w;
432
-                        xyuv.i_display_height = event.resize.h;
433
-                        xyuv.screen = SDL_SetVideoMode( xyuv.i_display_width, xyuv.i_display_height, 0,
434
-                                                        SDL_HWSURFACE|SDL_RESIZABLE|
435
-                                                        SDL_ASYNCBLIT|SDL_HWACCEL );
436
-                        xyuv_display( &xyuv, xyuv.i_frame );
437
-                        break;
438
-
439
-                    default:
440
-                        break;
441
-                }
442
-            }
443
-            if( b_refresh ) {
444
-                xyuv.b_pause = 1;
445
-                xyuv_display( &xyuv, xyuv.i_frame );
446
-            }
447
-            /* wait */
448
-            i_wait = 1000 / xyuv.f_fps - ( SDL_GetTicks() - i_start);
449
-            if( i_wait < 0 )
450
-                break;
451
-            else if( i_wait > 200 )
452
-                SDL_Delay( 200 );
453
-            else {
454
-                SDL_Delay( i_wait );
455
-                break;
456
-            }
457
-        }
458
-        if( !xyuv.b_pause ) {
459
-            /* next frame */
460
-            if( xyuv.i_frame == xyuv.i_frames )
461
-                    xyuv.b_pause = 1;
462
-            else if( xyuv.i_frame < xyuv.i_frames )
463
-                xyuv.i_frame++;
464
-        }
465
-    }
466
-
467
-
468
-    return 0;
469
-
470
-err_missing_arg:
471
-    fprintf( stderr, "missing arg for option=%s\n", argv[i] );
472
-    return -1;
473
-}
474
-
475
-
476
-static void xyuv_display( xyuv_t *xyuv, int i_frame )
477
-{
478
-    SDL_Rect rect;
479
-    int i_picture = 0;
480
-    int i;
481
-
482
-    if( i_frame > xyuv->i_frames )
483
-        return;
484
-
485
-    xyuv->i_frame = i_frame;
486
-
487
-    /* Load and copy pictue data */
488
-    for( i = 0; i < xyuv->i_yuv; i++ ) {
489
-        int i_plane;
490
-
491
-        fprintf( stderr, "yuv[%d] %d/%d\n", i, i_frame, xyuv->yuv[i].i_frames );
492
-        if( i_frame - 1 >= xyuv->yuv[i].i_frames ) {
493
-            xyuv_count_frames( xyuv );
494
-            if( i_frame - 1 >= xyuv->yuv[i].i_frames )
495
-                continue;
496
-        }
497
-        i_picture++;
498
-
499
-        fseek( xyuv->yuv[i].f, (xyuv->i_frame-1) * xyuv->i_frame_size, SEEK_SET );
500
-        fread( xyuv->pic, xyuv->i_frame_size, 1, xyuv->yuv[i].f );
501
-
502
-        SDL_LockYUVOverlay( xyuv->overlay );
503
-
504
-        if( xyuv->b_diff || xyuv->b_split ) {
505
-            /* Reset UV */
506
-            for( i_plane = 1; i_plane < 3; i_plane++ ) {
507
-                memset( xyuv->overlay->pixels[i_plane], 128, xyuv->overlay->pitches[i_plane] * xyuv->overlay->h / 2 );
508
-            }
509
-            /* Show diff in Y plane of overlay */
510
-
511
-            for( i_plane = 0; i_plane < 3; i_plane++ ) {
512
-                int div = i_plane == 0 ? 1 : 2;
513
-                uint8_t *src = xyuv->pic;
514
-                uint8_t *dst = xyuv->overlay->pixels[0] +
515
-                                (xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[0] );
516
-                int j;
517
-                if( i_plane == 1 ) {
518
-                    src +=  5*xyuv->i_width * xyuv->i_height/4;
519
-                    dst += xyuv->i_width;
520
-                } else if( i_plane == 2 ) {
521
-                    src += xyuv->i_width * xyuv->i_height;
522
-                    dst += xyuv->i_width + xyuv->i_height / 2 * xyuv->overlay->pitches[0];
523
-                }
524
-
525
-                for( j = 0; j < xyuv->i_height / div; j++ ) {
526
-                    if( i_picture == 1 || xyuv->b_split ) {
527
-                        memcpy( dst, src, xyuv->i_width / div );
528
-                    } else {
529
-                        int k;
530
-                        for( k = 0; k < xyuv->i_width / div; k++ ) {
531
-                            dst[k] = abs( dst[k] - src[k]);
532
-                        }
533
-                    }
534
-                    src += xyuv->i_width / div;
535
-                    dst += xyuv->overlay->pitches[0];
536
-                }
537
-            }
538
-        } else {
539
-            for( i_plane = 0; i_plane < 3; i_plane++ ) {
540
-                int div = i_plane == 0 ? 1 : 2;
541
-                uint8_t *src = xyuv->pic;
542
-                uint8_t *dst = xyuv->overlay->pixels[i_plane] +
543
-                                ((xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[i_plane] ) / div );
544
-                int w = xyuv->i_width / div;
545
-                int j;
546
-
547
-                if( i_plane == 1 ) {
548
-                    src +=  5*xyuv->i_width * xyuv->i_height/4;
549
-                } else if( i_plane == 2 ) {
550
-                    src += xyuv->i_width * xyuv->i_height;
551
-                }
552
-                if( xyuv->i_join > 0 ) {
553
-                    if( i_picture > 1 ) {
554
-                        src += xyuv->i_join / div;
555
-                        dst += xyuv->i_join / div;
556
-                        w = (xyuv->i_width - xyuv->i_join) /div;
557
-                    } else {
558
-                        w = xyuv->i_join / div;
559
-                    }
560
-                }
561
-
562
-                for( j = 0; j < xyuv->i_height / div; j++ ) {
563
-                    memcpy( dst, src, w );
564
-                    src += xyuv->i_width / div;
565
-                    dst += xyuv->overlay->pitches[i_plane];
566
-                }
567
-            }
568
-        }
569
-
570
-        SDL_UnlockYUVOverlay( xyuv->overlay );
571
-    }
572
-
573
-    if( xyuv->f_y != 0.0 ) {
574
-        uint8_t *pix = xyuv->overlay->pixels[0];
575
-        int j;
576
-
577
-        for( j = 0; j < xyuv->i_sdl_height; j++ ) {
578
-            int k;
579
-            for( k = 0; k < xyuv->i_sdl_width; k++ ) {
580
-                int v= pix[k] * xyuv->f_y;
581
-                if( v > 255 )
582
-                    pix[k] = 255;
583
-                else if( v < 0 )
584
-                    pix[k] = 0;
585
-                else
586
-                    pix[k] = v;
587
-            }
588
-            pix += xyuv->overlay->pitches[0];
589
-        }
590
-    }
591
-    if( xyuv->b_grid ) {
592
-        int x, y;
593
-
594
-        for( y = 0; y < xyuv->i_sdl_height; y += 4 ) {
595
-            uint8_t *p = xyuv->overlay->pixels[0] + y * xyuv->overlay->pitches[0];
596
-            for( x = 0; x < xyuv->i_sdl_width; x += 4 ) {
597
-                if( x%16== 0 || y%16 == 0 )
598
-                    p[x] = 0;
599
-            }
600
-        }
601
-    }
602
-
603
-    /* Update display */
604
-    rect.x = 0;
605
-    rect.y = 0;
606
-    rect.w = xyuv->i_display_width;
607
-    rect.h = xyuv->i_display_height;
608
-    SDL_DisplayYUVOverlay( xyuv->overlay, &rect );
609
-
610
-    /* Display title */
611
-    if( xyuv->title )
612
-        free( xyuv->title );
613
-    asprintf( &xyuv->title, SDL_TITLE, xyuv->yuv[0].name, xyuv->i_frame, xyuv->i_frames, xyuv->f_fps );
614
-    SDL_WM_SetCaption( xyuv->title, "" );
615
-}
616
-
617
-static void xyuv_count_frames( xyuv_t *xyuv )
618
-{
619
-    int i;
620
-
621
-    xyuv->i_frames = 0;
622
-    if( xyuv->i_frame_size <= 0 )
623
-        return;
624
-
625
-    for( i = 0; i < xyuv->i_yuv; i++ ) {
626
-        /* Beurk but avoid using fstat */
627
-        fseek( xyuv->yuv[i].f, 0, SEEK_END );
628
-
629
-        xyuv->yuv[i].i_frames = ftell( xyuv->yuv[i].f ) / xyuv->i_frame_size;
630
-        fprintf( stderr, "count (%d) -> %d\n", i, xyuv->yuv[i].i_frames );
631
-
632
-        fseek( xyuv->yuv[i].f, 0, SEEK_SET );
633
-
634
-        if( xyuv->i_frames < xyuv->yuv[i].i_frames )
635
-            xyuv->i_frames = xyuv->yuv[i].i_frames;
636
-    }
637
-}
638
-
639
-static inline int ssd( int a ) { return a*a; }
640
-
641
-static void xyuv_detect( int *pi_width, int *pi_height )
642
-{
643
-    static const int pi_size[][2] = {
644
-        {128, 96},
645
-        {160,120},
646
-        {320,244},
647
-        {320,288},
648
-
649
-        /* PAL */
650
-        {176,144},  // QCIF
651
-        {352,288},  // CIF
652
-        {352,576},  // 1/2 D1
653
-        {480,576},  // 2/3 D1
654
-        {544,576},
655
-        {640,576},  // VGA
656
-        {704,576},  // D1
657
-        {720,576},  // D1
658
-
659
-        /* NTSC */
660
-        {176,112},  // QCIF
661
-        {320,240},  // MPEG I
662
-        {352,240},  // CIF
663
-        {352,480},  // 1/2 D1
664
-        {480,480},  // 2/3 D1
665
-        {544,480},
666
-        {640,480},  // VGA
667
-        {704,480},  // D1
668
-        {720,480},  // D1
669
-
670
-        /* */
671
-        {0,0},
672
-    };
673
-    int i_max;
674
-    int i_size_max;
675
-    uint8_t *pic;
676
-    int i;
677
-
678
-    *pi_width = 0;
679
-    *pi_height = 0;
680
-
681
-    /* Compute size max */
682
-    for( i_max = 0, i_size_max = 0;
683
-            pi_size[i_max][0] != 0 && pi_size[i_max][1] != 0; i_max++ ) {
684
-        int s = pi_size[i_max][0] * pi_size[i_max][1] * 3 / 2;
685
-
686
-        if( i_size_max < s )
687
-            i_size_max = s;
688
-    }
689
-
690
-    /* Temporary buffer */
691
-    i_size_max *= 3;
692
-    pic = malloc( i_size_max );
693
-
694
-    fprintf( stderr, "guessing size for:\n" );
695
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
696
-        int j;
697
-        int i_read;
698
-        double dbest = 255*255;
699
-        int    i_best = i_max;
700
-        int64_t t;
701
-
702
-        fprintf( stderr, " - %s\n", xyuv.yuv[i].name );
703
-
704
-        i_read = fread( pic, 1, i_size_max, xyuv.yuv[i].f );
705
-        if( i_read < 0 )
706
-            continue;
707
-
708
-        /* Check if file size is at least compatible with one format
709
-         * (if not, ignore file size)*/
710
-        fseek( xyuv.yuv[i].f, 0, SEEK_END );
711
-        t = ftell( xyuv.yuv[i].f );
712
-        fseek( xyuv.yuv[i].f, 0, SEEK_SET );
713
-        for( j = 0; j < i_max; j++ ) {
714
-            const int w = pi_size[j][0];
715
-            const int h = pi_size[j][1];
716
-            const int s = w * h * 3 / 2;
717
-
718
-            if( t % s == 0 )
719
-                break;
720
-        }
721
-        if( j == i_max )
722
-            t = 0;
723
-
724
-
725
-        /* Try all size */
726
-        for( j = 0; j < i_max; j++ ) {
727
-            const int w = pi_size[j][0];
728
-            const int h = pi_size[j][1];
729
-            const int s = w * h * 3 / 2;
730
-            double dd;
731
-
732
-            int x, y, n;
733
-            int64_t d;
734
-
735
-            /* To small */
736
-            if( i_read < 3*s )
737
-                continue;
738
-            /* Check file size */
739
-            if( ( t > 0 && (t % s) != 0  ) ) {
740
-                fprintf( stderr, "  * %dx%d ignored (incompatible file size)\n", w, h );
741
-                continue;
742
-            }
743
-
744
-
745
-            /* We do a simple ssd between 2 consecutives lines */
746
-            d = 0;
747
-            for( n = 0; n < 3; n++ ) {
748
-                uint8_t *p;
749
-
750
-                /* Y */
751
-                p = &pic[n*s];
752
-                for( y = 0; y < h-1; y++ ) {
753
-                    for( x = 0; x < w; x++ )
754
-                        d += ssd( p[x] - p[w+x] );
755
-                    p += w;
756
-                }
757
-
758
-                /* U */
759
-                p = &pic[n*s+w*h];
760
-                for( y = 0; y < h/2-1; y++ ) {
761
-                    for( x = 0; x < w/2; x++ )
762
-                        d += ssd( p[x] - p[(w/2)+x] );
763
-                    p += w/2;
764
-                }
765
-
766
-                /* V */
767
-                p = &pic[n*s+5*w*h/4];
768
-                for( y = 0; y < h/2-1; y++ ) {
769
-                    for( x = 0; x < w/2; x++ )
770
-                        d += ssd( p[x] - p[(w/2)+x] );
771
-                    p += w/2;
772
-                }
773
-            }
774
-            dd = (double)d / (3*w*h*3/2);
775
-            fprintf( stderr, "  * %dx%d d=%f\n", w, h, dd );
776
-
777
-            if( dd < dbest ) {
778
-                i_best = j;
779
-                dbest = dd;
780
-            }
781
-        }
782
-
783
-        fseek( xyuv.yuv[i].f, 0, SEEK_SET );
784
-
785
-        if( i_best < i_max ) {
786
-            fprintf( stderr, "  -> %dx%d\n", pi_size[i_best][0], pi_size[i_best][1] );
787
-            *pi_width = pi_size[i_best][0];
788
-            *pi_height = pi_size[i_best][1];
789
-        }
790
-    }
791
-
792
-    free( pic );
793
-}
794
x264-snapshot-20130723-2245.tar.bz2/AUTHORS -> x264-snapshot-20140321-2245.tar.bz2/AUTHORS Changed
20
 
1
@@ -43,8 +43,8 @@
2
 S: Brittany, France
3
 
4
 N: Henrik Gramner
5
-E: hengar-6 AT student DOT ltu DOT se
6
-D: 4:2:2 chroma subsampling, x86 asm
7
+E: henrik AT gramner DOT com
8
+D: 4:2:2 chroma subsampling, x86 asm, Windows improvements, bugfixes
9
 S: Sweden
10
 
11
 N: Jason Garrett-Glaser
12
@@ -99,7 +99,3 @@
13
 E: radoslaw AT syskin DOT cjb DOT net
14
 D: Cached motion compensation
15
 
16
-N: Tuukka Toivonen
17
-E: tuukkat AT ee DOT oulu DOT fi
18
-D: Visualization
19
-
20
x264-snapshot-20130723-2245.tar.bz2/Makefile -> x264-snapshot-20140321-2245.tar.bz2/Makefile Changed
56
 
1
@@ -69,9 +69,8 @@
2
 SRCCLI += output/mp4.c
3
 endif
4
 
5
-# Visualization sources
6
-ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
7
-SRCS   += common/visualize.c common/display-x11.c
8
+ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),)
9
+SRCCLI += output/mp4_lsmash.c
10
 endif
11
 
12
 # MMX/SSE optims
13
@@ -247,29 +246,29 @@
14
    rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
15
 
16
 install-cli: cli
17
-   install -d $(DESTDIR)$(bindir)
18
-   install x264$(EXE) $(DESTDIR)$(bindir)
19
+   $(INSTALL) -d $(DESTDIR)$(bindir)
20
+   $(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
21
 
22
 install-lib-dev:
23
-   install -d $(DESTDIR)$(includedir)
24
-   install -d $(DESTDIR)$(libdir)
25
-   install -d $(DESTDIR)$(libdir)/pkgconfig
26
-   install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
27
-   install -m 644 x264_config.h $(DESTDIR)$(includedir)
28
-   install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
29
+   $(INSTALL) -d $(DESTDIR)$(includedir)
30
+   $(INSTALL) -d $(DESTDIR)$(libdir)
31
+   $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
32
+   $(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
33
+   $(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
34
+   $(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
35
 
36
 install-lib-static: lib-static install-lib-dev
37
-   install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
38
+   $(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
39
    $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
40
 
41
 install-lib-shared: lib-shared install-lib-dev
42
 ifneq ($(IMPLIBNAME),)
43
-   install -d $(DESTDIR)$(bindir)
44
-   install -m 755 $(SONAME) $(DESTDIR)$(bindir)
45
-   install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
46
+   $(INSTALL) -d $(DESTDIR)$(bindir)
47
+   $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
48
+   $(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
49
 else ifneq ($(SONAME),)
50
    ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
51
-   install -m 755 $(SONAME) $(DESTDIR)$(libdir)
52
+   $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir)
53
 endif
54
 
55
 uninstall:
56
x264-snapshot-20130723-2245.tar.bz2/common/arm/asm.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/asm.S Changed
19
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * asm.S: arm utility macros
4
  *****************************************************************************
5
- * Copyright (C) 2008-2013 x264 project
6
+ * Copyright (C) 2008-2014 x264 project
7
  *
8
  * Authors: Mans Rullgard <mans@mansr.com>
9
  *          David Conrad <lessen42@gmail.com>
10
@@ -26,6 +26,8 @@
11
 
12
 #include "config.h"
13
 
14
+.syntax unified
15
+
16
 #ifdef PREFIX
17
 #   define EXTERN_ASM _
18
 #else
19
x264-snapshot-20130723-2245.tar.bz2/common/arm/cpu-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/cpu-a.S Changed
28
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cpu-a.S: arm cpu detection
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
@@ -26,7 +26,7 @@
11
 #include "asm.S"
12
 
13
 .fpu neon
14
-.align
15
+.align 2
16
 
17
 // done in gas because .fpu neon overrides the refusal to assemble
18
 // instructions the selected -march/-mcpu doesn't support
19
@@ -95,7 +95,7 @@
20
     sub         r2, r2, r1
21
     cmpgt       r2, #30 << 3    // assume context switch if it took over 30 cycles
22
     addle       r3, r3, r2
23
-    subles      ip, ip, #1
24
+    subsle      ip, ip, #1
25
     bgt         average_loop
26
 
27
     // disable counters if we enabled them
28
x264-snapshot-20130723-2245.tar.bz2/common/arm/dct-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct-a.S Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /****************************************************************************
3
  * dct-a.S: arm transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * dct.h: arm transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/deblock-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/deblock-a.S Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * deblock.S: arm deblocking
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Mans Rullgard <mans@mansr.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-a.S Changed
50
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.S: arm motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *          Mans Rullgard <mans@mansr.com>
10
@@ -167,7 +167,7 @@
11
     ldr         ip, [sp, #8]
12
     push        {r4-r6,lr}
13
     cmp         ip, #32
14
-    ldrd        r4, [sp, #16]
15
+    ldrd        r4, r5, [sp, #16]
16
     mov         lr, #\h
17
     beq         x264_pixel_avg_w\w\()_neon
18
     rsbs        r6,  ip,  #64
19
@@ -447,7 +447,7 @@
20
 .ifc \type, full
21
     ldr         lr,  [r4, #32]      // denom
22
 .endif
23
-    ldrd        r4,  [r4, #32+4]    // scale, offset
24
+    ldrd        r4,  r5,  [r4, #32+4]    // scale, offset
25
     vdup.16     q0,  r4
26
     vdup.16     q1,  r5
27
 .ifc \type, full
28
@@ -818,8 +818,8 @@
29
 function x264_mc_chroma_neon
30
     push            {r4-r8, lr}
31
     vpush           {d8-d11}
32
-    ldrd            r4, [sp, #56]
33
-    ldrd            r6, [sp, #64]
34
+    ldrd            r4, r5, [sp, #56]
35
+    ldrd            r6, r7, [sp, #64]
36
 
37
     asr             lr, r6, #3
38
     mul             lr, r4, lr
39
@@ -1380,8 +1380,8 @@
40
 function x264_frame_init_lowres_core_neon
41
     push            {r4-r10,lr}
42
     vpush           {d8-d15}
43
-    ldrd            r4,  [sp, #96]
44
-    ldrd            r6,  [sp, #104]
45
+    ldrd            r4,  r5,  [sp, #96]
46
+    ldrd            r6,  r7,  [sp, #104]
47
     ldr             lr,  [sp, #112]
48
     sub             r10, r6,  r7            // dst_stride - width
49
     and             r10, r10, #~15
50
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-c.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc-c.c: arm motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.h: arm motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel-a.S Changed
119
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.S: arm pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
@@ -328,9 +328,9 @@
11
 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
12
     push        {r6-r7,lr}
13
 .if \x == 3
14
-    ldrd        r6,  [sp, #12]
15
+    ldrd        r6,  r7,  [sp, #12]
16
 .else
17
-    ldrd        r6,  [sp, #16]
18
+    ldrd        r6,  r7,  [sp, #16]
19
     ldr         r12, [sp, #12]
20
 .endif
21
     mov         lr,  #FENC_STRIDE
22
@@ -519,6 +519,38 @@
23
     b               x264_var_end
24
 .endfunc
25
 
26
+function x264_pixel_var_8x16_neon
27
+    vld1.64         {d16}, [r0,:64], r1
28
+    vld1.64         {d18}, [r0,:64], r1
29
+    vmull.u8        q1,  d16, d16
30
+    vmovl.u8        q0,  d16
31
+    vld1.64         {d20}, [r0,:64], r1
32
+    vmull.u8        q2,  d18, d18
33
+    vaddw.u8        q0,  q0,  d18
34
+
35
+    mov             ip,  #12
36
+
37
+    vld1.64         {d22}, [r0,:64], r1
38
+    VAR_SQR_SUM     q1,  q1,   q14,  d20, vpaddl.u16
39
+    vld1.64         {d16}, [r0,:64], r1
40
+    VAR_SQR_SUM     q2,  q2,   q15,  d22, vpaddl.u16
41
+
42
+1:  subs            ip,  ip,  #4
43
+    vld1.64         {d18}, [r0,:64], r1
44
+    VAR_SQR_SUM     q1,  q14,  q12, d16
45
+    vld1.64         {d20}, [r0,:64], r1
46
+    VAR_SQR_SUM     q2,  q15,  q13, d18
47
+    vld1.64         {d22}, [r0,:64], r1
48
+    VAR_SQR_SUM     q1,  q12,  q14, d20
49
+    beq             2f
50
+    vld1.64         {d16}, [r0,:64], r1
51
+    VAR_SQR_SUM     q2,  q13,  q15, d22
52
+    b               1b
53
+2:
54
+    VAR_SQR_SUM     q2,  q13,  q15, d22
55
+    b               x264_var_end
56
+.endfunc
57
+
58
 function x264_pixel_var_16x16_neon
59
     vld1.64         {d16-d17}, [r0,:128], r1
60
     vmull.u8        q12, d16, d16
61
@@ -596,13 +628,56 @@
62
     vadd.s32        d1,  d2,  d3
63
     vpadd.s32       d0,  d0,  d1
64
 
65
-    vmov.32         r0,  r1,  d0
66
+    vmov            r0,  r1,  d0
67
     vst1.32         {d0[1]}, [ip,:32]
68
     mul             r0,  r0,  r0
69
     sub             r0,  r1,  r0,  lsr #6
70
     bx              lr
71
 .endfunc
72
 
73
+function x264_pixel_var2_8x16_neon
74
+    vld1.64         {d16}, [r0,:64], r1
75
+    vld1.64         {d17}, [r2,:64], r3
76
+    vld1.64         {d18}, [r0,:64], r1
77
+    vld1.64         {d19}, [r2,:64], r3
78
+    vsubl.u8        q10, d16, d17
79
+    vsubl.u8        q11, d18, d19
80
+    SQR_ACC         q1,  d20, d21,  vmull.s16
81
+    vld1.64         {d16}, [r0,:64], r1
82
+    vadd.s16        q0,  q10, q11
83
+    vld1.64         {d17}, [r2,:64], r3
84
+    SQR_ACC         q2,  d22, d23,  vmull.s16
85
+    mov             ip,  #14
86
+1:  subs            ip,  ip,  #2
87
+    vld1.64         {d18}, [r0,:64], r1
88
+    vsubl.u8        q10, d16, d17
89
+    vld1.64         {d19}, [r2,:64], r3
90
+    vadd.s16        q0,  q0,  q10
91
+    SQR_ACC         q1,  d20, d21
92
+    vsubl.u8        q11, d18, d19
93
+    beq             2f
94
+    vld1.64         {d16}, [r0,:64], r1
95
+    vadd.s16        q0,  q0,  q11
96
+    vld1.64         {d17}, [r2,:64], r3
97
+    SQR_ACC         q2,  d22, d23
98
+    b               1b
99
+2:
100
+    vadd.s16        q0,  q0,  q11
101
+    SQR_ACC         q2,  d22, d23
102
+
103
+    ldr             ip,  [sp]
104
+    vadd.s16        d0,  d0,  d1
105
+    vadd.s32        q1,  q1,  q2
106
+    vpaddl.s16      d0,  d0
107
+    vadd.s32        d1,  d2,  d3
108
+    vpadd.s32       d0,  d0,  d1
109
+
110
+    vmov            r0,  r1,  d0
111
+    vst1.32         {d0[1]}, [ip,:32]
112
+    mul             r0,  r0,  r0
113
+    sub             r0,  r1,  r0,  lsr #7
114
+    bx              lr
115
+.endfunc
116
 
117
 .macro LOAD_DIFF_8x4 q0 q1 q2 q3
118
     vld1.32     {d1}, [r2], r3
119
x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel.h Changed
22
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.h: arm pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
@@ -56,8 +56,10 @@
11
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
12
 
13
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
14
+uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
15
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
16
-int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
17
+int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
18
+int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
19
 
20
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
21
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
22
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-a.S Changed
22
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.S: arm intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *          Mans Rullgard <mans@mansr.com>
10
@@ -181,9 +181,9 @@
11
 
12
 function x264_predict_8x8_dc_neon
13
     mov     ip, #0
14
-    ldrd    r2, [r1, #8]
15
+    ldrd    r2, r3, [r1, #8]
16
     push    {r4-r5,lr}
17
-    ldrd    r4, [r1, #16]
18
+    ldrd    r4, r5, [r1, #16]
19
     lsl     r3, r3, #8
20
     ldrb    lr, [r1, #7]
21
     usad8   r2, r2, ip
22
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-c.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.c: arm intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict.h Changed
27
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.h: arm intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
@@ -26,6 +26,16 @@
11
 #ifndef X264_ARM_PREDICT_H
12
 #define X264_ARM_PREDICT_H
13
 
14
+void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
15
+void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
16
+void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
17
+void x264_predict_8x8c_dc_neon( pixel *src );
18
+void x264_predict_8x8c_h_neon( pixel *src );
19
+void x264_predict_8x8c_v_neon( pixel *src );
20
+void x264_predict_16x16_v_neon( pixel *src );
21
+void x264_predict_16x16_h_neon( pixel *src );
22
+void x264_predict_16x16_dc_neon( pixel *src );
23
+
24
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
25
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
26
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
27
x264-snapshot-20130723-2245.tar.bz2/common/arm/quant-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant-a.S Changed
37
 
1
@@ -1,7 +1,7 @@
2
 /****************************************************************************
3
  * quant.S: arm quantization and level-run
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
@@ -312,7 +312,7 @@
11
 
12
 // int coeff_last( int16_t *l )
13
 function x264_coeff_last4_arm
14
-    ldrd        r2,  [r0]
15
+    ldrd        r2,  r3,  [r0]
16
     subs        r0,  r3,  #0
17
     movne       r0,  #2
18
     movne       r2,  r3
19
@@ -341,7 +341,7 @@
20
 
21
     subs        r1,  ip,  r1,  lsr #2
22
     addge       r0,  r1,  #\size - 8
23
-    sublts      r0,  r3,  r0,  lsr #2
24
+    subslt      r0,  r3,  r0,  lsr #2
25
     movlt       r0,  #0
26
     bx          lr
27
 .endfunc
28
@@ -390,7 +390,7 @@
29
 
30
     subs        r1,  ip,  r1
31
     addge       r0,  r1,  #32
32
-    sublts      r0,  ip,  r0
33
+    subslt      r0,  ip,  r0
34
     movlt       r0,  #0
35
     bx          lr
36
 .endfunc
37
x264-snapshot-20130723-2245.tar.bz2/common/arm/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * quant.h: arm quantization and level-run
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: David Conrad <lessen42@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/bitstream.c -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * bitstream.c: bitstream writing
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/bitstream.h -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * bitstream.h: bitstream writing
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cabac.c: arithmetic coder
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/cabac.h -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cabac.h: arithmetic coder
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/common.c -> x264-snapshot-20140321-2245.tar.bz2/common/common.c Changed
121
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * common.c: misc common functions
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -32,6 +32,9 @@
11
 #if HAVE_MALLOC_H
12
 #include <malloc.h>
13
 #endif
14
+#if HAVE_THP
15
+#include <sys/mman.h>
16
+#endif
17
 
18
 const int x264_bit_depth = BIT_DEPTH;
19
 
20
@@ -342,7 +345,7 @@
21
             param->analyse.i_luma_deadzone[1] = 6;
22
             param->rc.f_qcompress = 0.8;
23
         }
24
-        else if( !strncasecmp( s, "stillimage", 5 ) )
25
+        else if( !strncasecmp( s, "stillimage", 10 ) )
26
         {
27
             if( psy_tuning_used++ ) goto psy_failure;
28
             param->i_deblocking_filter_alphac0 = -3;
29
@@ -668,6 +671,8 @@
30
     }
31
     OPT("bluray-compat")
32
         p->b_bluray_compat = atobool(value);
33
+    OPT("avcintra-class")
34
+        p->i_avcintra_class = atoi(value);
35
     OPT("sar")
36
     {
37
         b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
38
@@ -876,10 +881,6 @@
39
     }
40
     OPT("log")
41
         p->i_log_level = atoi(value);
42
-#if HAVE_VISUALIZE
43
-    OPT("visualize")
44
-        p->b_visualize = atobool(value);
45
-#endif
46
     OPT("dump-yuv")
47
         p->psz_dump_yuv = strdup(value);
48
     OPT2("analyse", "partitions")
49
@@ -1031,6 +1032,8 @@
50
         p->b_vfr_input = !atobool(value);
51
     OPT("nal-hrd")
52
         b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
53
+    OPT("filler")
54
+        p->rc.b_filler = atobool(value);
55
     OPT("pic-struct")
56
         p->b_pic_struct = atobool(value);
57
     OPT("fake-interlaced")
58
@@ -1099,7 +1102,7 @@
59
             break;
60
     }
61
     fprintf( stderr, "x264 [%s]: ", psz_prefix );
62
-    vfprintf( stderr, psz_fmt, arg );
63
+    x264_vfprintf( stderr, psz_fmt, arg );
64
 }
65
 
66
 /****************************************************************************
67
@@ -1141,7 +1144,7 @@
68
     };
69
 
70
     int csp = i_csp & X264_CSP_MASK;
71
-    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX )
72
+    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
73
         return -1;
74
     x264_picture_init( pic );
75
     pic->img.i_csp = i_csp;
76
@@ -1183,7 +1186,25 @@
77
 {
78
     uint8_t *align_buf = NULL;
79
 #if HAVE_MALLOC_H
80
-    align_buf = memalign( NATIVE_ALIGN, i_size );
81
+#if HAVE_THP
82
+#define HUGE_PAGE_SIZE 2*1024*1024
83
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
84
+    /* Attempt to allocate huge pages to reduce TLB misses. */
85
+    if( i_size >= HUGE_PAGE_THRESHOLD )
86
+    {
87
+        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
88
+        if( align_buf )
89
+        {
90
+            /* Round up to the next huge page boundary if we are close enough. */
91
+            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
92
+            madvise( align_buf, madv_size, MADV_HUGEPAGE );
93
+        }
94
+    }
95
+    else
96
+#undef HUGE_PAGE_SIZE
97
+#undef HUGE_PAGE_THRESHOLD
98
+#endif
99
+        align_buf = memalign( NATIVE_ALIGN, i_size );
100
 #else
101
     uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
102
     if( buf )
103
@@ -1246,7 +1267,7 @@
104
     int b_error = 0;
105
     size_t i_size;
106
     char *buf;
107
-    FILE *fh = fopen( filename, "rb" );
108
+    FILE *fh = x264_fopen( filename, "rb" );
109
     if( !fh )
110
         return NULL;
111
     b_error |= fseek( fh, 0, SEEK_END ) < 0;
112
@@ -1383,7 +1404,7 @@
113
         s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
114
 
115
     if( p->rc.i_vbv_buffer_size )
116
-        s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] );
117
+        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
118
     if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
119
         s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
120
                                                    p->crop_rect.i_right, p->crop_rect.i_bottom );
121
x264-snapshot-20130723-2245.tar.bz2/common/common.h -> x264-snapshot-20140321-2245.tar.bz2/common/common.h Changed
102
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * common.h: misc common functions
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -54,6 +54,31 @@
11
     memset( var, 0, size );\
12
 } while( 0 )
13
 
14
+/* Macros for merging multiple allocations into a single large malloc, for improved
15
+ * use with huge pages. */
16
+
17
+/* Needs to be enough to contain any set of buffers that use combined allocations */
18
+#define PREALLOC_BUF_SIZE 1024
19
+
20
+#define PREALLOC_INIT\
21
+    int    prealloc_idx = 0;\
22
+    size_t prealloc_size = 0;\
23
+    uint8_t **preallocs[PREALLOC_BUF_SIZE];
24
+
25
+#define PREALLOC( var, size )\
26
+do {\
27
+    var = (void*)prealloc_size;\
28
+    preallocs[prealloc_idx++] = (uint8_t**)&var;\
29
+    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
30
+} while(0)
31
+
32
+#define PREALLOC_END( ptr )\
33
+do {\
34
+    CHECKED_MALLOC( ptr, prealloc_size );\
35
+    while( prealloc_idx-- )\
36
+        *preallocs[prealloc_idx] += (intptr_t)ptr;\
37
+} while(0)
38
+
39
 #define ARRAY_SIZE(array)  (sizeof(array)/sizeof(array[0]))
40
 
41
 #define X264_BFRAME_MAX 16
42
@@ -84,6 +109,7 @@
43
 
44
 #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
45
 #define FILLER_OVERHEAD (NALU_OVERHEAD+1)
46
+#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
47
 
48
 /****************************************************************************
49
  * Includes
50
@@ -491,6 +517,9 @@
51
     uint8_t *nal_buffer;
52
     int      nal_buffer_size;
53
 
54
+    x264_t          *reconfig_h;
55
+    int             reconfig;
56
+
57
     /**** thread synchronization starts here ****/
58
 
59
     /* frame number/poc */
60
@@ -523,15 +552,15 @@
61
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
62
     int             (*dequant8_mf[4])[64];   /* [4][6][64] */
63
     /* quantization matrix for trellis, [cqm][qp][coef] */
64
-    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
65
-    int             (*unquant8_mf[4])[64];   /* [4][52][64] */
66
+    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
67
+    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
68
     /* quantization matrix for deadzone */
69
-    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
70
-    udctcoef        (*quant8_mf[4])[64];     /* [4][52][64] */
71
-    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
72
-    udctcoef        (*quant8_bias[4])[64];   /* [4][52][64] */
73
-    udctcoef        (*quant4_bias0[4])[16];  /* [4][52][16] */
74
-    udctcoef        (*quant8_bias0[4])[64];  /* [4][52][64] */
75
+    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
76
+    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
77
+    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
78
+    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
79
+    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
80
+    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
81
     udctcoef        (*nr_offset_emergency)[4][64];
82
 
83
     /* mv/ref cost arrays. */
84
@@ -699,6 +728,7 @@
85
          * and won't be copied from one thread to another */
86
 
87
         /* mb table */
88
+        uint8_t *base;                      /* base pointer for all malloced data in this mb */
89
         int8_t  *type;                      /* mb type */
90
         uint8_t *partition;                 /* mb partition */
91
         int8_t  *qp;                        /* mb qp */
92
@@ -937,9 +967,6 @@
93
     x264_deblock_function_t loopf;
94
     x264_bitstream_function_t bsf;
95
 
96
-#if HAVE_VISUALIZE
97
-    struct visualize_t *visualize;
98
-#endif
99
     x264_lookahead_t *lookahead;
100
 
101
 #if HAVE_OPENCL
102
x264-snapshot-20130723-2245.tar.bz2/common/cpu.c -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.c Changed
114
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cpu.c: cpu detection
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -74,7 +74,6 @@
11
 #undef MMX2
12
     {"Cache32",         X264_CPU_CACHELINE_32},
13
     {"Cache64",         X264_CPU_CACHELINE_64},
14
-    {"SSEMisalign",     X264_CPU_SSE_MISALIGN},
15
     {"LZCNT",           X264_CPU_LZCNT},
16
     {"BMI1",            X264_CPU_BMI1},
17
     {"BMI2",            X264_CPU_BMI1|X264_CPU_BMI2},
18
@@ -123,7 +122,7 @@
19
     uint32_t cpu = 0;
20
     uint32_t eax, ebx, ecx, edx;
21
     uint32_t vendor[4] = {0};
22
-    uint32_t max_extended_cap;
23
+    uint32_t max_extended_cap, max_basic_cap;
24
     int cache;
25
 
26
 #if !ARCH_X86_64
27
@@ -132,7 +131,8 @@
28
 #endif
29
 
30
     x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
31
-    if( eax == 0 )
32
+    max_basic_cap = eax;
33
+    if( max_basic_cap == 0 )
34
         return 0;
35
 
36
     x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
37
@@ -169,15 +169,18 @@
38
         }
39
     }
40
 
41
-    x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
42
-    /* AVX2 requires OS support, but BMI1/2 don't. */
43
-    if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
44
-        cpu |= X264_CPU_AVX2;
45
-    if( ebx&0x00000008 )
46
+    if( max_basic_cap >= 7 )
47
     {
48
-        cpu |= X264_CPU_BMI1;
49
-        if( ebx&0x00000100 )
50
-            cpu |= X264_CPU_BMI2;
51
+        x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
52
+        /* AVX2 requires OS support, but BMI1/2 don't. */
53
+        if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
54
+            cpu |= X264_CPU_AVX2;
55
+        if( ebx&0x00000008 )
56
+        {
57
+            cpu |= X264_CPU_BMI1;
58
+            if( ebx&0x00000100 )
59
+                cpu |= X264_CPU_BMI2;
60
+        }
61
     }
62
 
63
     if( cpu & X264_CPU_SSSE3 )
64
@@ -210,12 +213,6 @@
65
             }
66
         }
67
 
68
-        if( ecx&0x00000080 ) /* Misalign SSE */
69
-        {
70
-            cpu |= X264_CPU_SSE_MISALIGN;
71
-            x264_cpu_mask_misalign_sse();
72
-        }
73
-
74
         if( cpu & X264_CPU_AVX )
75
         {
76
             if( ecx&0x00000800 ) /* XOP */
77
@@ -274,7 +271,7 @@
78
             x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
79
             cache = ecx&0xff; // cacheline size
80
         }
81
-        if( !cache )
82
+        if( !cache && max_basic_cap >= 2 )
83
         {
84
             // Cache and TLB Information
85
             static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
86
@@ -307,7 +304,7 @@
87
             x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
88
     }
89
 
90
-#if BROKEN_STACK_ALIGNMENT
91
+#if STACK_ALIGNMENT < 16
92
     cpu |= X264_CPU_STACK_MOD4;
93
 #endif
94
 
95
@@ -429,6 +426,10 @@
96
     return sysconf( _SC_NPROCESSORS_ONLN );
97
 
98
 #elif SYS_LINUX
99
+#ifdef __ANDROID__
100
+    // Android NDK does not expose sched_getaffinity
101
+    return sysconf( _SC_NPROCESSORS_CONF );
102
+#else
103
     cpu_set_t p_aff;
104
     memset( &p_aff, 0, sizeof(p_aff) );
105
     if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
106
@@ -441,6 +442,7 @@
107
         np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
108
     return np;
109
 #endif
110
+#endif
111
 
112
 #elif SYS_BEOS
113
     system_info info;
114
x264-snapshot-20130723-2245.tar.bz2/common/cpu.h -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.h Changed
29
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cpu.h: cpu detection
4
  *****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *
10
@@ -45,7 +45,6 @@
11
 #define x264_emms()
12
 #endif
13
 #define x264_sfence x264_cpu_sfence
14
-void     x264_cpu_mask_misalign_sse( void );
15
 void     x264_safe_intel_cpu_indicator_init( void );
16
 
17
 /* kludge:
18
@@ -58,8 +57,8 @@
19
  * alignment between functions (osdep.h handles manual alignment of arrays
20
  * if it doesn't).
21
  */
22
-#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
23
-int x264_stack_align( void (*func)(), ... );
24
+#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX
25
+intptr_t x264_stack_align( void (*func)(), ... );
26
 #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
27
 #else
28
 #define x264_stack_align(func,...) func(__VA_ARGS__)
29
x264-snapshot-20130723-2245.tar.bz2/common/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/dct.c Changed
15
 
1
@@ -1,11 +1,11 @@
2
 /*****************************************************************************
3
  * dct.c: transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
- *          Henrik Gramner <hengar-6@student.ltu.se>
11
+ *          Henrik Gramner <henrik@gramner.com>
12
  *
13
  * This program is free software; you can redistribute it and/or modify
14
  * it under the terms of the GNU General Public License as published by
15
x264-snapshot-20130723-2245.tar.bz2/common/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/dct.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * dct.h: transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/deblock.c Changed
16
 
1
@@ -1,12 +1,12 @@
2
 /*****************************************************************************
3
  * deblock.c: deblocking
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
11
- *          Henrik Gramner <hengar-6@student.ltu.se>
12
+ *          Henrik Gramner <henrik@gramner.com>
13
  *
14
  * This program is free software; you can redistribute it and/or modify
15
  * it under the terms of the GNU General Public License as published by
16
x264-snapshot-20130723-2245.tar.bz2/common/frame.c -> x264-snapshot-20140321-2245.tar.bz2/common/frame.c Changed
304
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * frame.c: frame handling
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -53,6 +53,7 @@
11
         case X264_CSP_NV16:
12
         case X264_CSP_I422:
13
         case X264_CSP_YV16:
14
+        case X264_CSP_V210:
15
             return X264_CSP_NV16;
16
         case X264_CSP_I444:
17
         case X264_CSP_YV24:
18
@@ -86,6 +87,7 @@
19
 #endif
20
 
21
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
22
+    PREALLOC_INIT
23
 
24
     /* allocate frame data (+64 for extra data for me) */
25
     i_width  = h->mb.i_mb_width*16;
26
@@ -124,7 +126,7 @@
27
 
28
     for( int i = 0; i < h->param.i_bframe + 2; i++ )
29
         for( int j = 0; j < h->param.i_bframe + 2; j++ )
30
-            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
31
+            PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
32
 
33
     frame->i_poc = -1;
34
     frame->i_type = X264_TYPE_AUTO;
35
@@ -149,13 +151,9 @@
36
     {
37
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
38
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
39
-        CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
40
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
41
+        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
42
         if( PARAM_INTERLACED )
43
-        {
44
-            CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
45
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
46
-        }
47
+            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
48
     }
49
 
50
     /* all 4 luma planes allocated together, since the cacheline split code
51
@@ -167,24 +165,15 @@
52
         if( h->param.analyse.i_subpel_refine && b_fdec )
53
         {
54
             /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
55
-            CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
56
+            PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
57
             if( PARAM_INTERLACED )
58
-                CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
59
-            for( int i = 0; i < 4; i++ )
60
-            {
61
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
62
-                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
63
-            }
64
-            frame->plane[p] = frame->filtered[p][0];
65
-            frame->plane_fld[p] = frame->filtered_fld[p][0];
66
+                PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
67
         }
68
         else
69
         {
70
-            CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
71
+            PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
72
             if( PARAM_INTERLACED )
73
-                CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
74
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
75
-            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
76
+                PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
77
         }
78
     }
79
 
80
@@ -192,36 +181,30 @@
81
 
82
     if( b_fdec ) /* fdec frame */
83
     {
84
-        CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
85
-        CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
86
-        CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
87
-        CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
88
-        M32( frame->mv16x16[0] ) = 0;
89
-        frame->mv16x16++;
90
-        CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
91
+        PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
92
+        PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
93
+        PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
94
+        PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
95
+        PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
96
         if( h->param.i_bframe )
97
         {
98
-            CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
99
-            CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
100
+            PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
101
+            PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
102
         }
103
         else
104
         {
105
             frame->mv[1]  = NULL;
106
             frame->ref[1] = NULL;
107
         }
108
-        CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
109
-        CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
110
-        CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
111
+        PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
112
+        PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
113
+        PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
114
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
115
-        {
116
-            CHECKED_MALLOC( frame->buffer[3],
117
-                            frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
118
-            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
119
-        }
120
+            PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
121
         if( PARAM_INTERLACED )
122
-            CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
123
+            PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
124
         if( h->param.analyse.b_mb_info )
125
-            CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
126
+            PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
127
     }
128
     else /* fenc frame */
129
     {
130
@@ -229,30 +212,85 @@
131
         {
132
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
133
 
134
-            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
135
-            for( int i = 0; i < 4; i++ )
136
-                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
137
+            PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
138
 
139
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
140
                 for( int i = 0; i <= h->param.i_bframe; i++ )
141
                 {
142
-                    CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
143
-                    CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
144
+                    PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
145
+                    PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
146
                 }
147
-            CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
148
+            PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
149
             for( int j = 0; j <= h->param.i_bframe+1; j++ )
150
                 for( int i = 0; i <= h->param.i_bframe+1; i++ )
151
-                    CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
152
-            frame->i_intra_cost = frame->lowres_costs[0][0];
153
-            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
154
+                    PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
155
+
156
         }
157
         if( h->param.rc.i_aq_mode )
158
         {
159
-            CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
160
-            CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
161
+            PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
162
+            PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
163
             if( h->frames.b_have_lowres )
164
+                PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
165
+        }
166
+    }
167
+
168
+    PREALLOC_END( frame->base );
169
+
170
+    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
171
+    {
172
+        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
173
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
174
+        if( PARAM_INTERLACED )
175
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
176
+    }
177
+
178
+    for( int p = 0; p < luma_plane_count; p++ )
179
+    {
180
+        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
181
+        if( h->param.analyse.i_subpel_refine && b_fdec )
182
+        {
183
+            for( int i = 0; i < 4; i++ )
184
+            {
185
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
186
+                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
187
+            }
188
+            frame->plane[p] = frame->filtered[p][0];
189
+            frame->plane_fld[p] = frame->filtered_fld[p][0];
190
+        }
191
+        else
192
+        {
193
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
194
+            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
195
+        }
196
+    }
197
+
198
+    if( b_fdec )
199
+    {
200
+        M32( frame->mv16x16[0] ) = 0;
201
+        frame->mv16x16++;
202
+
203
+        if( h->param.analyse.i_me_method >= X264_ME_ESA )
204
+            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
205
+    }
206
+    else
207
+    {
208
+        if( h->frames.b_have_lowres )
209
+        {
210
+            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
211
+            for( int i = 0; i < 4; i++ )
212
+                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
213
+
214
+            for( int j = 0; j <= !!h->param.i_bframe; j++ )
215
+                for( int i = 0; i <= h->param.i_bframe; i++ )
216
+                    memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
217
+
218
+            frame->i_intra_cost = frame->lowres_costs[0][0];
219
+            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
220
+
221
+            if( h->param.rc.i_aq_mode )
222
                 /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
223
-                CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
224
+                memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
225
         }
226
     }
227
 
228
@@ -278,42 +316,8 @@
229
      * so freeing those pointers would cause a double free later. */
230
     if( !frame->b_duplicate )
231
     {
232
-        for( int i = 0; i < 4; i++ )
233
-        {
234
-            x264_free( frame->buffer[i] );
235
-            x264_free( frame->buffer_fld[i] );
236
-        }
237
-        for( int i = 0; i < 4; i++ )
238
-            x264_free( frame->buffer_lowres[i] );
239
-        for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
240
-            for( int j = 0; j < X264_BFRAME_MAX+2; j++ )
241
-                x264_free( frame->i_row_satds[i][j] );
242
-        for( int j = 0; j < 2; j++ )
243
-            for( int i = 0; i <= X264_BFRAME_MAX; i++ )
244
-            {
245
-                x264_free( frame->lowres_mvs[j][i] );
246
-                x264_free( frame->lowres_mv_costs[j][i] );
247
-            }
248
-        x264_free( frame->i_propagate_cost );
249
-        for( int j = 0; j <= X264_BFRAME_MAX+1; j++ )
250
-            for( int i = 0; i <= X264_BFRAME_MAX+1; i++ )
251
-                x264_free( frame->lowres_costs[j][i] );
252
-        x264_free( frame->f_qp_offset );
253
-        x264_free( frame->f_qp_offset_aq );
254
-        x264_free( frame->i_inv_qscale_factor );
255
-        x264_free( frame->i_row_bits );
256
-        x264_free( frame->f_row_qp );
257
-        x264_free( frame->f_row_qscale );
258
-        x264_free( frame->field );
259
-        x264_free( frame->effective_qp );
260
-        x264_free( frame->mb_type );
261
-        x264_free( frame->mb_partition );
262
-        x264_free( frame->mv[0] );
263
-        x264_free( frame->mv[1] );
264
-        if( frame->mv16x16 )
265
-            x264_free( frame->mv16x16-1 );
266
-        x264_free( frame->ref[0] );
267
-        x264_free( frame->ref[1] );
268
+        x264_free( frame->base );
269
+
270
         if( frame->param && frame->param->param_free )
271
             frame->param->param_free( frame->param );
272
         if( frame->mb_info_free )
273
@@ -377,6 +381,12 @@
274
     }
275
 #endif
276
 
277
+    if( BIT_DEPTH != 10 && i_csp == X264_CSP_V210 )
278
+    {
279
+        x264_log( h, X264_LOG_ERROR, "v210 input is only compatible with bit-depth of 10 bits\n" );
280
+        return -1;
281
+    }
282
+
283
     dst->i_type     = src->i_type;
284
     dst->i_qpplus1  = src->i_qpplus1;
285
     dst->i_pts      = dst->i_reordered_pts = src->i_pts;
286
@@ -389,7 +399,16 @@
287
 
288
     uint8_t *pix[3];
289
     int stride[3];
290
-    if ( i_csp >= X264_CSP_BGR )
291
+    if( i_csp == X264_CSP_V210 )
292
+    {
293
+         stride[0] = src->img.i_stride[0];
294
+         pix[0] = src->img.plane[0];
295
+
296
+         h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0],
297
+                                             dst->plane[1], dst->i_stride[1],
298
+                                             (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height );
299
+    }
300
+    else if( i_csp >= X264_CSP_BGR )
301
     {
302
          stride[0] = src->img.i_stride[0];
303
          pix[0] = src->img.plane[0];
304
x264-snapshot-20130723-2245.tar.bz2/common/frame.h -> x264-snapshot-20140321-2245.tar.bz2/common/frame.h Changed
18
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * frame.h: frame handling
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -35,6 +35,7 @@
11
 typedef struct x264_frame
12
 {
13
     /* */
14
+    uint8_t *base;       /* Base pointer for all malloced data in this frame. */
15
     int     i_poc;
16
     int     i_delta_poc[2];
17
     int     i_type;
18
x264-snapshot-20130723-2245.tar.bz2/common/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.c Changed
185
 
1
@@ -1,12 +1,12 @@
2
 /*****************************************************************************
3
  * macroblock.c: macroblock common functions
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
  *          Loren Merritt <lorenm@u.washington.edu>
11
- *          Henrik Gramner <hengar-6@student.ltu.se>
12
+ *          Henrik Gramner <henrik@gramner.com>
13
  *
14
  * This program is free software; you can redistribute it and/or modify
15
  * it under the terms of the GNU General Public License as published by
16
@@ -256,25 +256,26 @@
17
 
18
     h->mb.b_interlaced = PARAM_INTERLACED;
19
 
20
-    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
21
-    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
22
-    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
23
-    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
24
-    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
25
+    PREALLOC_INIT
26
+
27
+    PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
28
+    PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
29
+    PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
30
+    PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
31
 
32
     /* 0 -> 3 top(4), 4 -> 6 : left(3) */
33
-    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
34
+    PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
35
 
36
     /* all coeffs */
37
-    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
38
+    PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
39
 
40
     if( h->param.b_cabac )
41
     {
42
-        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
43
-        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
44
-        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
45
+        PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
46
+        PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
47
+        PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
48
         if( h->param.i_bframe )
49
-            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
50
+            PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
51
     }
52
 
53
     for( int i = 0; i < 2; i++ )
54
@@ -284,11 +285,7 @@
55
             i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
56
 
57
         for( int j = !i; j < i_refs; j++ )
58
-        {
59
-            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
60
-            M32( h->mb.mvr[i][j][0] ) = 0;
61
-            h->mb.mvr[i][j]++;
62
-        }
63
+            PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
64
     }
65
 
66
     if( h->param.analyse.i_weighted_pred )
67
@@ -325,7 +322,24 @@
68
         }
69
 
70
         for( int i = 0; i < numweightbuf; i++ )
71
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
72
+            PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
73
+    }
74
+
75
+    PREALLOC_END( h->mb.base );
76
+
77
+    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
78
+
79
+    for( int i = 0; i < 2; i++ )
80
+    {
81
+        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
82
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
83
+            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
84
+
85
+        for( int j = !i; j < i_refs; j++ )
86
+        {
87
+            M32( h->mb.mvr[i][j][0] ) = 0;
88
+            h->mb.mvr[i][j]++;
89
+        }
90
     }
91
 
92
     return 0;
93
@@ -334,26 +348,7 @@
94
 }
95
 void x264_macroblock_cache_free( x264_t *h )
96
 {
97
-    for( int i = 0; i < 2; i++ )
98
-        for( int j = !i; j < X264_REF_MAX*2; j++ )
99
-            if( h->mb.mvr[i][j] )
100
-                x264_free( h->mb.mvr[i][j]-1 );
101
-    for( int i = 0; i < X264_REF_MAX; i++ )
102
-        x264_free( h->mb.p_weight_buf[i] );
103
-
104
-    if( h->param.b_cabac )
105
-    {
106
-        x264_free( h->mb.skipbp );
107
-        x264_free( h->mb.chroma_pred_mode );
108
-        x264_free( h->mb.mvd[0] );
109
-        x264_free( h->mb.mvd[1] );
110
-    }
111
-    x264_free( h->mb.slice_table );
112
-    x264_free( h->mb.intra4x4_pred_mode );
113
-    x264_free( h->mb.non_zero_count );
114
-    x264_free( h->mb.mb_transform_size );
115
-    x264_free( h->mb.cbp );
116
-    x264_free( h->mb.qp );
117
+    x264_free( h->mb.base );
118
 }
119
 
120
 int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
121
@@ -394,7 +389,7 @@
122
             ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
123
         scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
124
     }
125
-    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
126
+    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
127
     scratch_size = X264_MAX( scratch_size, buf_mbtree );
128
     if( scratch_size )
129
         CHECKED_MALLOC( h->scratch_buffer, scratch_size );
130
@@ -402,7 +397,9 @@
131
         h->scratch_buffer = NULL;
132
 
133
     int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
134
-    CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
135
+    int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
136
+    scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
137
+    CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
138
 
139
     return 0;
140
 fail:
141
@@ -1258,8 +1255,13 @@
142
         }
143
     }
144
 
145
-    if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
146
-        h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
147
+    if( b_mbaff && mb_x == 0 && !(mb_y&1) )
148
+    {
149
+        if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
150
+            h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
151
+        else
152
+            h->mb.field_decoding_flag = 0;
153
+    }
154
 
155
     /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
156
      * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
157
@@ -1267,26 +1269,8 @@
158
     if( b_mbaff )
159
     {
160
         if( MB_INTERLACED != h->mb.field_decoding_flag &&
161
-            h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
162
+            (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
163
             h->mb.b_allow_skip = 0;
164
-        if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
165
-        {
166
-            if( h->mb.i_neighbour & MB_LEFT )
167
-            {
168
-                if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
169
-                    h->mb.b_allow_skip = 0;
170
-            }
171
-            else if( h->mb.i_neighbour & MB_TOP )
172
-            {
173
-                if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
174
-                    h->mb.b_allow_skip = 0;
175
-            }
176
-            else // Frame mb pair is predicted
177
-            {
178
-                if( MB_INTERLACED )
179
-                    h->mb.b_allow_skip = 0;
180
-            }
181
-        }
182
     }
183
 
184
     if( h->param.b_cabac )
185
x264-snapshot-20130723-2245.tar.bz2/common/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * macroblock.h: macroblock common functions
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/mc.c Changed
177
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.c: motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -336,6 +336,34 @@
11
     }
12
 }
13
 
14
+void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
15
+                                          pixel *dstc, intptr_t i_dstc,
16
+                                          uint32_t *src, intptr_t i_src, int w, int h )
17
+{
18
+    for( int l = 0; l < h; l++ )
19
+    {
20
+        pixel *dsty0 = dsty;
21
+        pixel *dstc0 = dstc;
22
+        uint32_t *src0 = src;
23
+
24
+        for( int n = 0; n < w; n += 3 )
25
+        {
26
+            *(dstc0++) = *src0 & 0x03FF;
27
+            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
28
+            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
29
+            src0++;
30
+            *(dsty0++) = *src0 & 0x03FF;
31
+            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
32
+            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
33
+            src0++;
34
+        }
35
+
36
+        dsty += i_dsty;
37
+        dstc += i_dstc;
38
+        src  += i_src;
39
+    }
40
+}
41
+
42
 static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
43
 {
44
     for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
45
@@ -455,20 +483,97 @@
46
 
47
 /* Estimate the total amount of influence on future quality that could be had if we
48
  * were to improve the reference samples used to inter predict any given macroblock. */
49
-static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
50
+static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
51
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
52
 {
53
-    float fps = *fps_factor / 256.f;
54
+    float fps = *fps_factor;
55
     for( int i = 0; i < len; i++ )
56
     {
57
-        float intra_cost       = intra_costs[i] * inv_qscales[i];
58
-        float propagate_amount = propagate_in[i] + intra_cost*fps;
59
-        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
60
-        float propagate_denom  = intra_costs[i];
61
-        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
62
+        int intra_cost = intra_costs[i];
63
+        int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
64
+        float propagate_intra  = intra_cost * inv_qscales[i];
65
+        float propagate_amount = propagate_in[i] + propagate_intra*fps;
66
+        float propagate_num    = intra_cost - inter_cost;
67
+        float propagate_denom  = intra_cost;
68
+        dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
69
     }
70
 }
71
 
72
+static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
73
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
74
+                                   int bipred_weight, int mb_y, int len, int list )
75
+{
76
+    unsigned stride = h->mb.i_mb_stride;
77
+    unsigned width = h->mb.i_mb_width;
78
+    unsigned height = h->mb.i_mb_height;
79
+
80
+    for( unsigned i = 0; i < len; i++ )
81
+    {
82
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
83
+        int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
84
+
85
+        if( !(lists_used & (1 << list)) )
86
+            continue;
87
+
88
+        int listamount = propagate_amount[i];
89
+        /* Apply bipred weighting. */
90
+        if( lists_used == 3 )
91
+            listamount = (listamount * bipred_weight + 32) >> 6;
92
+
93
+        /* Early termination for simple case of mv0. */
94
+        if( !M32( mvs[i] ) )
95
+        {
96
+            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
97
+            continue;
98
+        }
99
+
100
+        int x = mvs[i][0];
101
+        int y = mvs[i][1];
102
+        unsigned mbx = (x>>5)+i;
103
+        unsigned mby = (y>>5)+mb_y;
104
+        unsigned idx0 = mbx + mby * stride;
105
+        unsigned idx2 = idx0 + stride;
106
+        x &= 31;
107
+        y &= 31;
108
+        int idx0weight = (32-y)*(32-x);
109
+        int idx1weight = (32-y)*x;
110
+        int idx2weight = y*(32-x);
111
+        int idx3weight = y*x;
112
+        idx0weight = (idx0weight * listamount + 512) >> 10;
113
+        idx1weight = (idx1weight * listamount + 512) >> 10;
114
+        idx2weight = (idx2weight * listamount + 512) >> 10;
115
+        idx3weight = (idx3weight * listamount + 512) >> 10;
116
+
117
+        if( mbx < width-1 && mby < height-1 )
118
+        {
119
+            CLIP_ADD( ref_costs[idx0+0], idx0weight );
120
+            CLIP_ADD( ref_costs[idx0+1], idx1weight );
121
+            CLIP_ADD( ref_costs[idx2+0], idx2weight );
122
+            CLIP_ADD( ref_costs[idx2+1], idx3weight );
123
+        }
124
+        else
125
+        {
126
+            /* Note: this takes advantage of unsigned representation to
127
+             * catch negative mbx/mby. */
128
+            if( mby < height )
129
+            {
130
+                if( mbx < width )
131
+                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
132
+                if( mbx+1 < width )
133
+                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
134
+            }
135
+            if( mby+1 < height )
136
+            {
137
+                if( mbx < width )
138
+                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
139
+                if( mbx+1 < width )
140
+                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
141
+            }
142
+        }
143
+    }
144
+#undef CLIP_ADD
145
+}
146
+
147
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
148
 {
149
     pf->mc_luma   = mc_luma;
150
@@ -507,6 +612,7 @@
151
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
152
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
153
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
154
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
155
 
156
     pf->hpel_filter = hpel_filter;
157
 
158
@@ -523,6 +629,7 @@
159
     pf->integral_init8v = integral_init8v;
160
 
161
     pf->mbtree_propagate_cost = mbtree_propagate_cost;
162
+    pf->mbtree_propagate_list = mbtree_propagate_list;
163
 
164
 #if HAVE_MMX
165
     x264_mc_init_mmx( cpu, pf );
166
@@ -536,7 +643,10 @@
167
 #endif
168
 
169
     if( cpu_independent )
170
+    {
171
         pf->mbtree_propagate_cost = mbtree_propagate_cost;
172
+        pf->mbtree_propagate_list = mbtree_propagate_list;
173
+    }
174
 }
175
 
176
 void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
177
x264-snapshot-20130723-2245.tar.bz2/common/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/mc.h Changed
34
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.h: motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *
10
@@ -93,6 +93,9 @@
11
                                      pixel *src,  intptr_t i_src, int w, int h );
12
     void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
13
                                          pixel *dstc, intptr_t i_dstc, pixel *src,  intptr_t i_src, int pw, int w, int h );
14
+    void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
15
+                                          pixel *dstc, intptr_t i_dstc,
16
+                                          uint32_t *src, intptr_t i_src, int w, int h );
17
     void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
18
                          intptr_t i_stride, int i_width, int i_height, int16_t *buf );
19
 
20
@@ -119,8 +122,12 @@
21
     weight_fn_t *offsetsub;
22
     void (*weight_cache)( x264_t *, x264_weight_t * );
23
 
24
-    void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
25
+    void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
26
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
27
+
28
+    void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
29
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
30
+                                   int bipred_weight, int mb_y, int len, int list );
31
 } x264_mc_functions_t;
32
 
33
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );
34
x264-snapshot-20130723-2245.tar.bz2/common/mvpred.c -> x264-snapshot-20140321-2245.tar.bz2/common/mvpred.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mvpred.c: motion vector prediction
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/opencl.c -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.c Changed
74
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * opencl.c: OpenCL initialization and kernel compilation
4
  *****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
  *
8
  * Authors: Steve Borho <sborho@multicorewareinc.com>
9
  *          Anton Mitrofanov <BugMaster@narod.ru>
10
@@ -28,7 +28,7 @@
11
 
12
 #ifdef _WIN32
13
 #include <windows.h>
14
-#define ocl_open LoadLibrary( "OpenCL" )
15
+#define ocl_open LoadLibraryW( L"OpenCL" )
16
 #define ocl_close FreeLibrary
17
 #define ocl_address GetProcAddress
18
 #else
19
@@ -119,10 +119,10 @@
20
 
21
 /* Try to load the cached compiled program binary, verify the device context is
22
  * still valid before reuse */
23
-static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version )
24
+static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
25
 {
26
     /* try to load cached program binary */
27
-    FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
28
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
29
     if( !fp )
30
         return NULL;
31
 
32
@@ -167,9 +167,9 @@
33
 
34
 /* Save the compiled program binary to a file for later reuse.  Device context
35
  * is also saved in the cache file so we do not reuse stale binaries */
36
-static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version )
37
+static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
38
 {
39
-    FILE *fp = fopen( h->param.psz_clbin_file, "wb" );
40
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
41
     if( !fp )
42
     {
43
         x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
44
@@ -304,7 +304,7 @@
45
         goto fail;
46
     }
47
 
48
-    FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" );
49
+    FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
50
     if( !log_file )
51
     {
52
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
53
@@ -672,9 +672,9 @@
54
     int ret = 0;
55
 
56
 #ifdef _WIN32
57
-    hDLL = LoadLibrary( "atiadlxx.dll" );
58
+    hDLL = LoadLibraryW( L"atiadlxx.dll" );
59
     if( !hDLL )
60
-        hDLL = LoadLibrary( "atiadlxy.dll" );
61
+        hDLL = LoadLibraryW( L"atiadlxy.dll" );
62
 #else
63
     hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
64
 #endif
65
@@ -685,7 +685,7 @@
66
     ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
67
     ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
68
     ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
69
-    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
70
+    if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
71
         !ADL_PowerXpress_Scheme_Get )
72
         goto fail1;
73
 
74
x264-snapshot-20130723-2245.tar.bz2/common/opencl.h -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * opencl.h: OpenCL structures and defines
4
  *****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
  *
8
  * Authors: Steve Borho <sborho@multicorewareinc.com>
9
  *          Anton Mitrofanov <BugMaster@narod.ru>
10
x264-snapshot-20130723-2245.tar.bz2/common/osdep.c -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.c Changed
109
 
1
@@ -1,10 +1,11 @@
2
 /*****************************************************************************
3
  * osdep.c: platform-specific code
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
+ *          Henrik Gramner <henrik@gramner.com>
11
  *
12
  * This program is free software; you can redistribute it and/or modify
13
  * it under the terms of the GNU General Public License as published by
14
@@ -26,6 +27,11 @@
15
 
16
 #include "common.h"
17
 
18
+#ifdef _WIN32
19
+#include <windows.h>
20
+#include <io.h>
21
+#endif
22
+
23
 #if SYS_WINDOWS
24
 #include <sys/types.h>
25
 #include <sys/timeb.h>
26
@@ -35,8 +41,6 @@
27
 #include <time.h>
28
 
29
 #if PTW32_STATIC_LIB
30
-#define WIN32_LEAN_AND_MEAN
31
-#include <windows.h>
32
 /* this is a global in pthread-win32 to indicate if it has been initialized or not */
33
 extern int ptw32_processInitialized;
34
 #endif
35
@@ -134,3 +138,73 @@
36
 {}
37
 #endif
38
 #endif
39
+
40
+#ifdef _WIN32
41
+/* Functions for dealing with Unicode on Windows. */
42
+FILE *x264_fopen( const char *filename, const char *mode )
43
+{
44
+    wchar_t filename_utf16[MAX_PATH];
45
+    wchar_t mode_utf16[16];
46
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
47
+        return _wfopen( filename_utf16, mode_utf16 );
48
+    return NULL;
49
+}
50
+
51
+int x264_rename( const char *oldname, const char *newname )
52
+{
53
+    wchar_t oldname_utf16[MAX_PATH];
54
+    wchar_t newname_utf16[MAX_PATH];
55
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
56
+    {
57
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
58
+        _wunlink( newname_utf16 );
59
+        return _wrename( oldname_utf16, newname_utf16 );
60
+    }
61
+    return -1;
62
+}
63
+
64
+int x264_stat( const char *path, x264_struct_stat *buf )
65
+{
66
+    wchar_t path_utf16[MAX_PATH];
67
+    if( utf8_to_utf16( path, path_utf16 ) )
68
+        return _wstati64( path_utf16, buf );
69
+    return -1;
70
+}
71
+
72
+int x264_vfprintf( FILE *stream, const char *format, va_list arg )
73
+{
74
+    HANDLE console = NULL;
75
+    DWORD mode;
76
+
77
+    if( stream == stdout )
78
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
79
+    else if( stream == stderr )
80
+        console = GetStdHandle( STD_ERROR_HANDLE );
81
+
82
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
83
+    if( GetConsoleMode( console, &mode ) )
84
+    {
85
+        char buf[4096];
86
+        wchar_t buf_utf16[4096];
87
+
88
+        int length = vsnprintf( buf, sizeof(buf), format, arg );
89
+        if( length > 0 && length < sizeof(buf) )
90
+        {
91
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
92
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
93
+            DWORD written;
94
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
95
+            return length;
96
+        }
97
+    }
98
+    return vfprintf( stream, format, arg );
99
+}
100
+
101
+int x264_is_pipe( const char *path )
102
+{
103
+    wchar_t path_utf16[MAX_PATH];
104
+    if( utf8_to_utf16( path, path_utf16 ) )
105
+        return WaitNamedPipeW( path_utf16, 0 );
106
+    return 0;
107
+}
108
+#endif
109
x264-snapshot-20130723-2245.tar.bz2/common/osdep.h -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.h Changed
122
 
1
@@ -1,10 +1,11 @@
2
 /*****************************************************************************
3
  * osdep.h: platform-specific code
4
  *****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
+ *          Henrik Gramner <henrik@gramner.com>
11
  *
12
  * This program is free software; you can redistribute it and/or modify
13
  * it under the terms of the GNU General Public License as published by
14
@@ -32,19 +33,21 @@
15
 #include <stdio.h>
16
 #include <sys/stat.h>
17
 #include <inttypes.h>
18
+#include <stdarg.h>
19
 
20
 #include "config.h"
21
 
22
+#ifdef __INTEL_COMPILER
23
+#include <mathimf.h>
24
+#else
25
+#include <math.h>
26
+#endif
27
+
28
 #if !HAVE_LOG2F
29
 #define log2f(x) (logf(x)/0.693147180559945f)
30
 #define log2(x) (log(x)/0.693147180559945)
31
 #endif
32
 
33
-#ifdef _WIN32
34
-#include <io.h>    // _setmode()
35
-#include <fcntl.h> // _O_BINARY
36
-#endif
37
-
38
 #ifdef __ICL
39
 #define inline __inline
40
 #define strcasecmp _stricmp
41
@@ -54,12 +57,6 @@
42
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
43
 #endif
44
 
45
-#ifdef __INTEL_COMPILER
46
-#include <mathimf.h>
47
-#else
48
-#include <math.h>
49
-#endif
50
-
51
 #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
52
 #define HAVE_X86_INLINE_ASM 1
53
 #endif
54
@@ -67,11 +64,29 @@
55
 #if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
56
 #define isfinite finite
57
 #endif
58
+
59
 #ifdef _WIN32
60
-#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't.
61
 #ifndef strtok_r
62
 #define strtok_r(str,delim,save) strtok(str,delim)
63
 #endif
64
+
65
+#define utf8_to_utf16( utf8, utf16 )\
66
+    MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
67
+FILE *x264_fopen( const char *filename, const char *mode );
68
+int x264_rename( const char *oldname, const char *newname );
69
+#define x264_struct_stat struct _stati64
70
+#define x264_fstat _fstati64
71
+int x264_stat( const char *path, x264_struct_stat *buf );
72
+int x264_vfprintf( FILE *stream, const char *format, va_list arg );
73
+int x264_is_pipe( const char *path );
74
+#else
75
+#define x264_fopen       fopen
76
+#define x264_rename      rename
77
+#define x264_struct_stat struct stat
78
+#define x264_fstat       fstat
79
+#define x264_stat        stat
80
+#define x264_vfprintf    vfprintf
81
+#define x264_is_pipe(x)  0
82
 #endif
83
 
84
 #ifdef __ICL
85
@@ -111,7 +126,7 @@
86
 
87
 #define EXPAND(x) x
88
 
89
-#if HAVE_32B_STACK_ALIGNMENT
90
+#if STACK_ALIGNMENT >= 32
91
 #define ALIGNED_ARRAY_32( type, name, sub1, ... )\
92
     ALIGNED_32( type name sub1 __VA_ARGS__ )
93
 #else
94
@@ -364,19 +379,19 @@
95
 #define x264_lower_thread_priority(p)
96
 #endif
97
 
98
-static inline uint8_t x264_is_regular_file( FILE *filehandle )
99
+static inline int x264_is_regular_file( FILE *filehandle )
100
 {
101
-    struct stat file_stat;
102
-    if( fstat( fileno( filehandle ), &file_stat ) )
103
-        return -1;
104
+    x264_struct_stat file_stat;
105
+    if( x264_fstat( fileno( filehandle ), &file_stat ) )
106
+        return 1;
107
     return S_ISREG( file_stat.st_mode );
108
 }
109
 
110
-static inline uint8_t x264_is_regular_file_path( const char *filename )
111
+static inline int x264_is_regular_file_path( const char *filename )
112
 {
113
-    struct stat file_stat;
114
-    if( stat( filename, &file_stat ) )
115
-        return -1;
116
+    x264_struct_stat file_stat;
117
+    if( x264_stat( filename, &file_stat ) )
118
+        return !x264_is_pipe( filename );
119
     return S_ISREG( file_stat.st_mode );
120
 }
121
 
122
x264-snapshot-20130723-2245.tar.bz2/common/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.c Changed
173
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.c: pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -36,6 +36,7 @@
11
 #endif
12
 #if ARCH_ARM
13
 #   include "arm/pixel.h"
14
+#   include "arm/predict.h"
15
 #endif
16
 #if ARCH_UltraSPARC
17
 #   include "sparc/pixel.h"
18
@@ -532,6 +533,10 @@
19
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
20
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
21
 #endif
22
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
23
+INTRA_MBCMP_8x8( sad, _neon, _neon )
24
+INTRA_MBCMP_8x8(sa8d, _neon, _neon )
25
+#endif
26
 
27
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
28
 void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
29
@@ -555,16 +560,26 @@
30
 
31
 #if HAVE_MMX
32
 #if HIGH_BIT_DEPTH
33
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
34
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
35
 #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
36
 #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
37
 #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
38
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _mmx2, _c )
39
-INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _c )
40
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _mmx2 )
41
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
42
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
43
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _mmx2, _mmx2 )
44
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _sse2, _sse2 )
45
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _sse2, _sse2 )
46
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse2, _sse2 )
47
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _sse2, _sse2 )
48
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _ssse3, _sse2 )
49
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _ssse3, _sse2 )
50
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _ssse3, _sse2 )
51
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _ssse3, _sse2 )
52
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse4, _sse2 )
53
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _avx, _sse2 )
54
 #else
55
 #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
56
 INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
57
@@ -577,6 +592,16 @@
58
 INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _xop, _mmx2 )
59
 #endif
60
 #endif
61
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
62
+INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _c )
63
+INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _c )
64
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
65
+INTRA_MBCMP(satd,  8x8,  dc, h,  v, c, _neon, _neon )
66
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _neon, _c )
67
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _neon, _c )
68
+INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
69
+INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
70
+#endif
71
 
72
 // No C implementation of intra_satd_x9. See checkasm for its behavior,
73
 // or see x264_mb_analyse_intra for the entirely different algorithm we
74
@@ -868,6 +893,8 @@
75
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
76
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
77
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
78
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
79
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
80
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
81
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
82
     }
83
@@ -909,6 +936,8 @@
84
         pixf->asd8 = x264_pixel_asd8_sse2;
85
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
86
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
87
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
88
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
89
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
90
     }
91
     if( cpu&X264_CPU_SSE2_IS_FAST )
92
@@ -948,6 +977,8 @@
93
         pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
94
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
95
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
96
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
97
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
98
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
99
     }
100
     if( cpu&X264_CPU_SSE4 )
101
@@ -963,6 +994,7 @@
102
 #if ARCH_X86_64
103
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
104
 #endif
105
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
106
     }
107
     if( cpu&X264_CPU_AVX )
108
     {
109
@@ -985,6 +1017,7 @@
110
 #if ARCH_X86_64
111
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
112
 #endif
113
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
114
     }
115
     if( cpu&X264_CPU_XOP )
116
     {
117
@@ -1119,12 +1152,6 @@
118
                pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
119
            }
120
         }
121
-
122
-        if( cpu&X264_CPU_SSE_MISALIGN )
123
-        {
124
-            INIT2( sad_x3, _sse2_misalign );
125
-            INIT2( sad_x4, _sse2_misalign );
126
-        }
127
     }
128
 
129
     if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
130
@@ -1201,9 +1228,8 @@
131
         }
132
         else
133
         {
134
-            pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3;
135
-            pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3;
136
-            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3;
137
+            INIT2( sad_x3, _ssse3 );
138
+            INIT5( sad_x4, _ssse3 );
139
         }
140
         if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
141
         {
142
@@ -1237,6 +1263,8 @@
143
     if( cpu&X264_CPU_AVX )
144
     {
145
         INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
146
+        INIT2( sad_x3, _avx );
147
+        INIT2( sad_x4, _avx );
148
         INIT8( satd, _avx );
149
         INIT7( satd_x3, _avx );
150
         INIT7( satd_x4, _avx );
151
@@ -1334,8 +1362,21 @@
152
         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
153
         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
154
         pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
155
+        pixf->var[PIXEL_8x16]   = x264_pixel_var_8x16_neon;
156
         pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
157
         pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8_neon;
158
+        pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_neon;
159
+
160
+        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
161
+        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
162
+        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
163
+        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
164
+        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
165
+        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
166
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
167
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
168
+        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
169
+        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
170
 
171
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
172
         pixf->ssim_end4         = x264_pixel_ssim_end4_neon;
173
x264-snapshot-20130723-2245.tar.bz2/common/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.h Changed
15
 
1
@@ -1,11 +1,11 @@
2
 /*****************************************************************************
3
  * pixel.c: pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
-            Henrik Gramner <hengar-6@student.ltu.se>
11
+            Henrik Gramner <henrik@gramner.com>
12
  *
13
  * This program is free software; you can redistribute it and/or modify
14
  * it under the terms of the GNU General Public License as published by
15
x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * dct.c: ppc transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
  *          Eric Petit <eric.petit@lapsus.org>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * dct.h: ppc transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Eric Petit <eric.petit@lapsus.org>
9
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/deblock.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * deblock.c: ppc deblocking
4
  *****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
  *
8
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.c: ppc motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Eric Petit <eric.petit@lapsus.org>
9
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.h: ppc motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Eric Petit <eric.petit@lapsus.org>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.c: ppc pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Eric Petit <eric.petit@lapsus.org>
9
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.h: ppc pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Eric Petit <eric.petit@lapsus.org>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/ppccommon.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/ppccommon.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * ppccommon.h: ppc utility macros
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Eric Petit <eric.petit@lapsus.org>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.c: ppc intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
  *
8
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.h: ppc intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
  *
8
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * quant.c: ppc quantization
4
  *****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
  *
8
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * quant.c: ppc quantization
4
  *****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
  *
8
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/predict.c Changed
16
 
1
@@ -1,12 +1,12 @@
2
 /*****************************************************************************
3
  * predict.c: intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
11
- *          Henrik Gramner <hengar-6@student.ltu.se>
12
+ *          Henrik Gramner <henrik@gramner.com>
13
  *
14
  * This program is free software; you can redistribute it and/or modify
15
  * it under the terms of the GNU General Public License as published by
16
x264-snapshot-20130723-2245.tar.bz2/common/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/predict.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.h: intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/quant.c Changed
16
 
1
@@ -1,12 +1,12 @@
2
 /*****************************************************************************
3
  * quant.c: quantization and level-run
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
  *          Christian Heine <sennindemokrit@gmx.net>
11
- *          Henrik Gramner <hengar-6@student.ltu.se>
12
+ *          Henrik Gramner <henrik@gramner.com>
13
  *
14
  * This program is free software; you can redistribute it and/or modify
15
  * it under the terms of the GNU General Public License as published by
16
x264-snapshot-20130723-2245.tar.bz2/common/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/quant.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * quant.h: quantization and level-run
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/rectangle.c -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * rectangle.c: rectangle filling
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/rectangle.h -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * rectangle.h: rectangle filling
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/set.c -> x264-snapshot-20140321-2245.tar.bz2/common/set.c Changed
42
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * set.c: quantization init
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *
10
@@ -105,9 +105,9 @@
11
         }\
12
         else\
13
         {\
14
-            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
15
+            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
16
             CHECKED_MALLOC( h->dequant##w##_mf[i],  6*size*sizeof(int) );\
17
-            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\
18
+            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\
19
         }\
20
         for( j = 0; j < i; j++ )\
21
             if( deadzone[j] == deadzone[i] &&\
22
@@ -120,8 +120,8 @@
23
         }\
24
         else\
25
         {\
26
-            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
27
-            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
28
+            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
29
+            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
30
         }\
31
     }
32
 
33
@@ -159,7 +159,7 @@
34
                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
35
             }
36
     }
37
-    for( int q = 0; q < QP_MAX+1; q++ )
38
+    for( int q = 0; q <= QP_MAX_SPEC; q++ )
39
     {
40
         int j;
41
         for( int i_list = 0; i_list < 4; i_list++ )
42
x264-snapshot-20130723-2245.tar.bz2/common/set.h -> x264-snapshot-20140321-2245.tar.bz2/common/set.h Changed
109
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * set.h: quantization init
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -248,6 +248,98 @@
11
     x264_cqm_jvt8i, x264_cqm_jvt8p
12
 };
13
 
14
+// 1080i25_avci50, 1080p25_avci50
15
+static const uint8_t x264_cqm_avci50_4ic[16] =
16
+{
17
+    16,22,28,40,
18
+    22,28,40,44,
19
+    28,40,44,48,
20
+    40,44,48,60
21
+};
22
+
23
+//  1080i25_avci50,
24
+static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
25
+{
26
+    16,18,19,21,27,33,81,87,
27
+    18,19,21,24,30,33,81,87,
28
+    19,21,24,27,30,78,84,90,
29
+    21,24,27,30,33,78,84,90,
30
+    24,27,30,33,78,81,84,90,
31
+    24,27,30,33,78,81,84,93,
32
+    27,30,33,78,78,81,87,93,
33
+    30,33,33,78,81,84,87,96
34
+};
35
+
36
+//  1080p25_avci50, 720p25_avci50, 720p50_avci50
37
+static const uint8_t x264_cqm_avci50_p_8iy[64] =
38
+{
39
+    16,18,19,21,24,27,30,33,
40
+    18,19,21,24,27,30,33,78,
41
+    19,21,24,27,30,33,78,81,
42
+    21,24,27,30,33,78,81,84,
43
+    24,27,30,33,78,81,84,87,
44
+    27,30,33,78,81,84,87,90,
45
+    30,33,78,81,84,87,90,93,
46
+    33,78,81,84,87,90,93,96
47
+};
48
+
49
+//  1080i25_avci100, 1080p25_avci100
50
+static const uint8_t x264_cqm_avci100_1080_4ic[16] =
51
+{
52
+    16,20,26,32,
53
+    20,26,32,38,
54
+    26,32,38,44,
55
+    32,38,44,50
56
+};
57
+
58
+// 720p25_avci100, 720p50_avci100
59
+static const uint8_t x264_cqm_avci100_720p_4ic[16] =
60
+{
61
+    16,21,27,34,
62
+    21,27,34,41,
63
+    27,34,41,46,
64
+    34,41,46,54
65
+};
66
+
67
+//  1080i25_avci100,
68
+static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
69
+{
70
+    16,19,20,23,24,26,32,42,
71
+    18,19,22,24,26,32,36,42,
72
+    18,20,23,24,26,32,36,63,
73
+    19,20,23,26,32,36,42,63,
74
+    20,22,24,26,32,36,59,63,
75
+    22,23,24,26,32,36,59,68,
76
+    22,23,24,26,32,42,59,68,
77
+    22,23,24,26,36,42,59,72
78
+};
79
+
80
+// 1080p25_avci100,
81
+static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
82
+{
83
+    16,18,19,20,22,23,24,26,
84
+    18,19,20,22,23,24,26,32,
85
+    19,20,22,23,24,26,32,36,
86
+    20,22,23,24,26,32,36,42,
87
+    22,23,24,26,32,36,42,59,
88
+    23,24,26,32,36,42,59,63,
89
+    24,26,32,36,42,59,63,68,
90
+    26,32,36,42,59,63,68,72
91
+};
92
+
93
+// 720p25_avci100, 720p50_avci100
94
+static const uint8_t x264_cqm_avci100_720p_8iy[64] =
95
+{
96
+    16,18,19,21,22,24,26,32,
97
+    18,19,19,21,22,24,26,32,
98
+    19,19,21,22,22,24,26,32,
99
+    21,21,22,22,23,24,26,34,
100
+    22,22,22,23,24,25,26,34,
101
+    24,24,24,24,25,26,34,36,
102
+    26,26,26,26,26,34,36,38,
103
+    32,32,32,34,34,36,38,42
104
+};
105
+
106
 int  x264_cqm_init( x264_t *h );
107
 void x264_cqm_delete( x264_t *h );
108
 int  x264_cqm_parse_file( x264_t *h, const char *filename );
109
x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.asm -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.asm: sparc pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Phil Jensen <philj@csufresno.edu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.h: sparc pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Phil Jensen <philj@csufresno.edu>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/threadpool.c -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * threadpool.c: thread pooling
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/threadpool.h -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * threadpool.h: thread pooling
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/common/vlc.c -> x264-snapshot-20140321-2245.tar.bz2/common/vlc.c Changed
15
 
1
@@ -1,11 +1,11 @@
2
 /*****************************************************************************
3
  * vlc.c : vlc tables
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
- *          Henrik Gramner <hengar-6@student.ltu.se>
11
+ *          Henrik Gramner <henrik@gramner.com>
12
  *
13
  * This program is free software; you can redistribute it and/or modify
14
  * it under the terms of the GNU General Public License as published by
15
x264-snapshot-20130723-2245.tar.bz2/common/win32thread.c -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.c Changed
28
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * win32thread.c: windows threading
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *          Pegasys Inc. <http://www.pegasys-inc.com>
10
@@ -261,7 +261,7 @@
11
 int x264_win32_threading_init( void )
12
 {
13
     /* find function pointers to API functions, if they exist */
14
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
15
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
16
     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
17
     if( thread_control.cond_init )
18
     {
19
@@ -288,7 +288,7 @@
20
      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
21
 #if ARCH_X86_64
22
     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
23
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
24
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
25
     BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
26
     if( get_thread_affinity )
27
     {
28
x264-snapshot-20130723-2245.tar.bz2/common/win32thread.h -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.h Changed
18
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * win32thread.h: windows threading
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
@@ -26,7 +26,6 @@
11
 #ifndef X264_WIN32THREAD_H
12
 #define X264_WIN32THREAD_H
13
 
14
-#define WIN32_LEAN_AND_MEAN
15
 #include <windows.h>
16
 /* the following macro is used within x264 */
17
 #undef ERROR
18
x264-snapshot-20130723-2245.tar.bz2/common/x86/bitstream-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/bitstream-a.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* bitstream-a.asm: x86 bitstream functions
4
 ;*****************************************************************************
5
-;* Copyright (C) 2010-2013 x264 project
6
+;* Copyright (C) 2010-2014 x264 project
7
 ;*
8
 ;* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
 ;*          Henrik Gramner <henrik@gramner.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/cabac-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cabac-a.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* cabac-a.asm: x86 cabac
4
 ;*****************************************************************************
5
-;* Copyright (C) 2008-2013 x264 project
6
+;* Copyright (C) 2008-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/const-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/const-a.asm Changed
18
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* const-a.asm: x86 global constants
4
 ;*****************************************************************************
5
-;* Copyright (C) 2010-2013 x264 project
6
+;* Copyright (C) 2010-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
@@ -36,6 +36,7 @@
11
 const pw_512,      times 16 dw 512
12
 const pw_00ff,     times 16 dw 0x00ff
13
 const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
14
+const pw_0to15,    dw 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
15
 const pd_1,        times 8 dd 1
16
 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
17
 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
18
x264-snapshot-20130723-2245.tar.bz2/common/x86/cpu-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cpu-a.asm Changed
28
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* cpu-a.asm: x86 cpu utilities
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
 ;*          Loren Merritt <lorenm@u.washington.edu>
10
@@ -146,17 +146,6 @@
11
     sfence
12
     ret
13
 
14
-;-----------------------------------------------------------------------------
15
-; void cpu_mask_misalign_sse( void )
16
-;-----------------------------------------------------------------------------
17
-cglobal cpu_mask_misalign_sse
18
-    sub   rsp, 4
19
-    stmxcsr [rsp]
20
-    or dword [rsp], 1<<17
21
-    ldmxcsr [rsp]
22
-    add   rsp, 4
23
-    ret
24
-
25
 cextern intel_cpu_indicator_init
26
 
27
 ;-----------------------------------------------------------------------------
28
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-32.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* dct-32.asm: x86_32 transform and zigzag
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Holger Lubitz <holger@lubitz.org>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-64.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* dct-64.asm: x86_64 transform and zigzag
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Holger Lubitz <holger@lubitz.org>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-a.asm Changed
19
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* dct-a.asm: x86 transform and zigzag
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Holger Lubitz <holger@lubitz.org>
9
 ;*          Loren Merritt <lorenm@u.washington.edu>
10
@@ -675,7 +675,7 @@
11
     mova        m6, [pw_pixel_max]
12
     mova        m7, [pd_32]
13
     pxor        m5, m5
14
-.loop
15
+.loop:
16
     mova        m3, [r1]
17
     paddd       m3, m7
18
     psrad       m3, 6         ; dc0   0 dc1   0 dc2   0 dc3   0
19
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * dct.h: x86 transform and zigzag
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/deblock-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/deblock-a.asm Changed
271
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* deblock-a.asm: x86 deblocking
4
 ;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
@@ -621,7 +621,7 @@
11
     mov     r6, 2
12
     mova    m0, [pw_2]
13
     LOAD_AB aa, bb, r2d, r3d
14
-.loop
15
+.loop:
16
     mova    p2, [r4+r1]
17
     mova    p1, [r4+2*r1]
18
     mova    p0, [r4+r5]
19
@@ -671,7 +671,7 @@
20
     add     r4, r0     ; pix+4*stride
21
     mov     r6, 2
22
     mova    m0, [pw_2]
23
-.loop
24
+.loop:
25
     movu    q3, [r0-8]
26
     movu    q2, [r0+r1-8]
27
     movu    q1, [r0+r1*2-8]
28
@@ -804,35 +804,6 @@
29
 %define PASS8ROWS(base, base3, stride, stride3, offset) \
30
     PASS8ROWS(base+offset, base3+offset, stride, stride3)
31
 
32
-; in: 8 rows of 4 bytes in %4..%11
33
-; out: 4 rows of 8 bytes in m0..m3
34
-%macro TRANSPOSE4x8_LOAD 11
35
-    movh       m0, %4
36
-    movh       m2, %5
37
-    movh       m1, %6
38
-    movh       m3, %7
39
-    punpckl%1  m0, m2
40
-    punpckl%1  m1, m3
41
-    mova       m2, m0
42
-    punpckl%2  m0, m1
43
-    punpckh%2  m2, m1
44
-
45
-    movh       m4, %8
46
-    movh       m6, %9
47
-    movh       m5, %10
48
-    movh       m7, %11
49
-    punpckl%1  m4, m6
50
-    punpckl%1  m5, m7
51
-    mova       m6, m4
52
-    punpckl%2  m4, m5
53
-    punpckh%2  m6, m5
54
-
55
-    punpckh%3  m1, m0, m4
56
-    punpckh%3  m3, m2, m6
57
-    punpckl%3  m0, m4
58
-    punpckl%3  m2, m6
59
-%endmacro
60
-
61
 ; in: 4 rows of 8 bytes in m0..m3
62
 ; out: 8 rows of 4 bytes in %1..%8
63
 %macro TRANSPOSE8x4B_STORE 8
64
@@ -844,24 +815,24 @@
65
     punpcklbw  m2, m3
66
     punpcklwd  m1, m0, m2
67
     punpckhwd  m0, m2
68
-    movh       %1, m1
69
+    movd       %1, m1
70
     punpckhdq  m1, m1
71
-    movh       %2, m1
72
-    movh       %3, m0
73
+    movd       %2, m1
74
+    movd       %3, m0
75
     punpckhdq  m0, m0
76
-    movh       %4, m0
77
+    movd       %4, m0
78
 
79
     punpckhdq  m3, m3
80
     punpcklbw  m4, m5
81
     punpcklbw  m6, m3
82
     punpcklwd  m5, m4, m6
83
     punpckhwd  m4, m6
84
-    movh       %5, m5
85
+    movd       %5, m5
86
     punpckhdq  m5, m5
87
-    movh       %6, m5
88
-    movh       %7, m4
89
+    movd       %6, m5
90
+    movd       %7, m4
91
     punpckhdq  m4, m4
92
-    movh       %8, m4
93
+    movd       %8, m4
94
 %endmacro
95
 
96
 ; in: 8 rows of 4 bytes in %9..%10
97
@@ -877,34 +848,94 @@
98
     pextrd %8, %10, 3
99
 %endmacro
100
 
101
-%macro TRANSPOSE4x8B_LOAD 8
102
-    TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
103
-%endmacro
104
-
105
-%macro TRANSPOSE4x8W_LOAD 8
106
-%if mmsize==16
107
-    TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8
108
-%else
109
+; in: 4 rows of 4 words in %1..%4
110
+; out: 4 rows of 4 word in m0..m3
111
+; clobbers: m4
112
+%macro TRANSPOSE4x4W_LOAD 4-8
113
+%if mmsize==8
114
     SWAP  1, 4, 2, 3
115
-    mova  m0, [t5]
116
-    mova  m1, [t5+r1]
117
-    mova  m2, [t5+r1*2]
118
-    mova  m3, [t5+t6]
119
+    movq  m0, %1
120
+    movq  m1, %2
121
+    movq  m2, %3
122
+    movq  m3, %4
123
     TRANSPOSE4x4W 0, 1, 2, 3, 4
124
+%else
125
+    movq       m0, %1
126
+    movq       m2, %2
127
+    movq       m1, %3
128
+    movq       m3, %4
129
+    punpcklwd  m0, m2
130
+    punpcklwd  m1, m3
131
+    mova       m2, m0
132
+    punpckldq  m0, m1
133
+    punpckhdq  m2, m1
134
+    movhlps    m1, m0
135
+    movhlps    m3, m2
136
 %endif
137
 %endmacro
138
 
139
-%macro TRANSPOSE8x2W_STORE 8
140
+; in: 2 rows of 4 words in m1..m2
141
+; out: 4 rows of 2 words in %1..%4
142
+; clobbers: m0, m1
143
+%macro TRANSPOSE4x2W_STORE 4-8
144
+%if mmsize==8
145
     punpckhwd  m0, m1, m2
146
     punpcklwd  m1, m2
147
-%if mmsize==8
148
+%else
149
+    punpcklwd  m1, m2
150
+    movhlps    m0, m1
151
+%endif
152
     movd       %3, m0
153
     movd       %1, m1
154
     psrlq      m1, 32
155
     psrlq      m0, 32
156
     movd       %2, m1
157
     movd       %4, m0
158
+%endmacro
159
+
160
+; in: 4/8 rows of 4 words in %1..%8
161
+; out: 4 rows of 4/8 word in m0..m3
162
+; clobbers: m4, m5, m6, m7
163
+%macro TRANSPOSE4x8W_LOAD 8
164
+%if mmsize==8
165
+    TRANSPOSE4x4W_LOAD %1, %2, %3, %4
166
+%else
167
+    movq       m0, %1
168
+    movq       m2, %2
169
+    movq       m1, %3
170
+    movq       m3, %4
171
+    punpcklwd  m0, m2
172
+    punpcklwd  m1, m3
173
+    mova       m2, m0
174
+    punpckldq  m0, m1
175
+    punpckhdq  m2, m1
176
+
177
+    movq       m4, %5
178
+    movq       m6, %6
179
+    movq       m5, %7
180
+    movq       m7, %8
181
+    punpcklwd  m4, m6
182
+    punpcklwd  m5, m7
183
+    mova       m6, m4
184
+    punpckldq  m4, m5
185
+    punpckhdq  m6, m5
186
+
187
+    punpckhqdq m1, m0, m4
188
+    punpckhqdq m3, m2, m6
189
+    punpcklqdq m0, m4
190
+    punpcklqdq m2, m6
191
+%endif
192
+%endmacro
193
+
194
+; in: 2 rows of 4/8 words in m1..m2
195
+; out: 4/8 rows of 2 words in %1..%8
196
+; clobbers: m0, m1
197
+%macro TRANSPOSE8x2W_STORE 8
198
+%if mmsize==8
199
+    TRANSPOSE4x2W_STORE %1, %2, %3, %4
200
 %else
201
+    punpckhwd  m0, m1, m2
202
+    punpcklwd  m1, m2
203
     movd       %5, m0
204
     movd       %1, m1
205
     psrldq     m1, 4
206
@@ -1118,7 +1149,7 @@
207
 %endif
208
     mova     m6, [pb_1]
209
     psubusb  m4, m6              ; alpha - 1
210
-    psubusb  m5, m6              ; alpha - 2
211
+    psubusb  m5, m6              ; beta - 1
212
 %if %0>2
213
     mova     %3, m4
214
 %endif
215
@@ -1361,19 +1392,18 @@
216
 ;-----------------------------------------------------------------------------
217
 ; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
218
 ;-----------------------------------------------------------------------------
219
-
220
 %if cpuflag(avx)
221
 INIT_XMM cpuname
222
 %else
223
 INIT_MMX cpuname
224
 %endif
225
-cglobal deblock_h_luma, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
226
-    mov    r0, r0mp
227
+cglobal deblock_h_luma, 1,5,8,0x60+12
228
     mov    r3, r1m
229
     lea    r4, [r3*3]
230
     sub    r0, 4
231
     lea    r1, [r0+r4]
232
-    %define pix_tmp esp+12*HAVE_ALIGNED_STACK
233
+    %define pix_tmp esp+12
234
+    ; esp is intentionally misaligned to make it aligned after pushing the arguments for deblock_%1_luma.
235
 
236
     ; transpose 6x16 -> tmp space
237
     TRANSPOSE6x8_MEM  PASS8ROWS(r0, r1, r3, r4), pix_tmp
238
@@ -2098,17 +2128,14 @@
239
 ;-----------------------------------------------------------------------------
240
 %macro DEBLOCK_H_CHROMA_420_MBAFF 0
241
 cglobal deblock_h_chroma_mbaff, 5,7,8
242
-    sub    r0, 4
243
-    lea    t6, [r1*3]
244
-    mov    t5, r0
245
-    add    r0, t6
246
-    TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
247
+    CHROMA_H_START
248
+    TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6)
249
     LOAD_MASK  r2d, r3d
250
     movd       m6, [r4] ; tc0
251
     punpcklbw  m6, m6
252
     pand       m7, m6
253
     DEBLOCK_P0_Q0
254
-    TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
255
+    TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
256
     RET
257
 %endmacro
258
 
259
@@ -2249,9 +2276,9 @@
260
 INIT_MMX mmx2
261
 cglobal deblock_h_chroma_intra_mbaff, 4,6,8
262
     CHROMA_H_START
263
-    TRANSPOSE4x8W_LOAD  PASS8ROWS(t5, r0, r1, t6)
264
+    TRANSPOSE4x4W_LOAD  PASS8ROWS(t5, r0, r1, t6)
265
     call chroma_intra_body
266
-    TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
267
+    TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
268
     RET
269
 %endif ; !HIGH_BIT_DEPTH
270
 
271
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a.asm Changed
116
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* mc-a.asm: x86 motion compensation
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
@@ -1029,59 +1029,48 @@
11
     jg     .height_loop
12
     RET
13
 
14
+INIT_XMM
15
 cglobal pixel_avg2_w16_sse2, 6,7
16
     sub    r4, r2
17
     lea    r6, [r4+r3]
18
 .height_loop:
19
-    movdqu xmm0, [r2]
20
-    movdqu xmm2, [r2+r3]
21
-    movdqu xmm1, [r2+r4]
22
-    movdqu xmm3, [r2+r6]
23
+    movu   m0, [r2]
24
+    movu   m2, [r2+r3]
25
+    movu   m1, [r2+r4]
26
+    movu   m3, [r2+r6]
27
     lea    r2, [r2+r3*2]
28
-    pavgb  xmm0, xmm1
29
-    pavgb  xmm2, xmm3
30
-    movdqa [r0], xmm0
31
-    movdqa [r0+r1], xmm2
32
+    pavgb  m0, m1
33
+    pavgb  m2, m3
34
+    mova [r0], m0
35
+    mova [r0+r1], m2
36
     lea    r0, [r0+r1*2]
37
-    sub    r5d, 2
38
-    jg     .height_loop
39
+    sub   r5d, 2
40
+    jg .height_loop
41
     RET
42
 
43
-%macro AVG2_W20 1
44
-cglobal pixel_avg2_w20_%1, 6,7
45
+cglobal pixel_avg2_w20_sse2, 6,7
46
     sub    r2, r4
47
     lea    r6, [r2+r3]
48
 .height_loop:
49
-    movdqu xmm0, [r4]
50
-    movdqu xmm2, [r4+r3]
51
-%ifidn %1, sse2_misalign
52
-    movd   mm4,  [r4+16]
53
-    movd   mm5,  [r4+r3+16]
54
-    pavgb  xmm0, [r4+r2]
55
-    pavgb  xmm2, [r4+r6]
56
-%else
57
-    movdqu xmm1, [r4+r2]
58
-    movdqu xmm3, [r4+r6]
59
-    movd   mm4,  [r4+16]
60
-    movd   mm5,  [r4+r3+16]
61
-    pavgb  xmm0, xmm1
62
-    pavgb  xmm2, xmm3
63
-%endif
64
-    pavgb  mm4,  [r4+r2+16]
65
-    pavgb  mm5,  [r4+r6+16]
66
+    movu   m0, [r4]
67
+    movu   m2, [r4+r3]
68
+    movu   m1, [r4+r2]
69
+    movu   m3, [r4+r6]
70
+    movd  mm4, [r4+16]
71
+    movd  mm5, [r4+r3+16]
72
+    pavgb  m0, m1
73
+    pavgb  m2, m3
74
+    pavgb mm4, [r4+r2+16]
75
+    pavgb mm5, [r4+r6+16]
76
     lea    r4, [r4+r3*2]
77
-    movdqa [r0], xmm0
78
-    movd   [r0+16], mm4
79
-    movdqa [r0+r1], xmm2
80
-    movd   [r0+r1+16], mm5
81
+    mova [r0], m0
82
+    mova [r0+r1], m2
83
+    movd [r0+16], mm4
84
+    movd [r0+r1+16], mm5
85
     lea    r0, [r0+r1*2]
86
-    sub    r5d, 2
87
-    jg     .height_loop
88
+    sub   r5d, 2
89
+    jg .height_loop
90
     RET
91
-%endmacro
92
-
93
-AVG2_W20 sse2
94
-AVG2_W20 sse2_misalign
95
 
96
 INIT_YMM avx2
97
 cglobal pixel_avg2_w20, 6,7
98
@@ -1524,7 +1513,7 @@
99
 %endmacro
100
 %else ; !HIGH_BIT_DEPTH
101
 %macro UNPACK_UNALIGNED 3
102
-%if mmsize == 8 || cpuflag(misalign)
103
+%if mmsize == 8
104
     punpcklwd  %1, %3
105
 %else
106
     movh       %2, %3
107
@@ -2130,8 +2119,6 @@
108
 %else ; !HIGH_BIT_DEPTH
109
 INIT_MMX mmx2
110
 MC_CHROMA
111
-INIT_XMM sse2, misalign
112
-MC_CHROMA
113
 INIT_XMM sse2
114
 MC_CHROMA
115
 INIT_XMM ssse3
116
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a2.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a2.asm Changed
570
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* mc-a2.asm: x86 motion compensation
4
 ;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
@@ -32,6 +32,7 @@
11
 
12
 SECTION_RODATA 32
13
 
14
+pw_1024: times 16 dw 1024
15
 filt_mul20: times 32 db 20
16
 filt_mul15: times 16 db 1, -5
17
 filt_mul51: times 16 db -5, 1
18
@@ -39,17 +40,25 @@
19
 deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
20
 
21
 %if HIGH_BIT_DEPTH
22
+v210_mask: times 4 dq 0xc00ffc003ff003ff
23
+v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15
24
+v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14
25
+; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register
26
+v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800
27
+           dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800
28
+
29
 deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14
30
 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15
31
 %else
32
+deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1
33
+                       db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1
34
+
35
 deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
36
 deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
37
-%endif
38
-pw_1024: times 16 dw 1024
39
+%endif ; !HIGH_BIT_DEPTH
40
 
41
 pd_16: times 4 dd 16
42
 pd_0f: times 4 dd 0xffff
43
-pf_inv256: times 8 dd 0.00390625
44
 
45
 pad10: times 8 dw    10*PIXEL_MAX
46
 pad20: times 8 dw    20*PIXEL_MAX
47
@@ -60,16 +69,22 @@
48
 tap2: times 4 dw 20, 20
49
 tap3: times 4 dw -5,  1
50
 
51
+pw_0xc000: times 8 dw 0xc000
52
+pw_31: times 8 dw 31
53
+pd_4: times 4 dd 4
54
+
55
 SECTION .text
56
 
57
 cextern pb_0
58
 cextern pw_1
59
+cextern pw_8
60
 cextern pw_16
61
 cextern pw_32
62
 cextern pw_512
63
 cextern pw_00ff
64
 cextern pw_3fff
65
 cextern pw_pixel_max
66
+cextern pw_0to15
67
 cextern pd_ffff
68
 
69
 %macro LOAD_ADD 4
70
@@ -482,7 +497,7 @@
71
     %define pw_rnd [pw_32]
72
 %endif
73
 ; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer...
74
-%if cpuflag(misalign) || mmsize==32
75
+%if mmsize==32
76
 .loop:
77
     movu    m4, [src-4]
78
     movu    m5, [src-2]
79
@@ -630,8 +645,6 @@
80
 HPEL_V 0
81
 INIT_XMM sse2
82
 HPEL_V 8
83
-INIT_XMM sse2, misalign
84
-HPEL_C
85
 %if ARCH_X86_64 == 0
86
 INIT_XMM sse2
87
 HPEL_C
88
@@ -1197,6 +1210,163 @@
89
     RET
90
 %endmacro ; PLANE_DEINTERLEAVE
91
 
92
+%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2
93
+%if cpuflag(ssse3)
94
+    mova        m3, [deinterleave_rgb_shuf+(%1-3)*16]
95
+%endif
96
+%%loopy:
97
+    mov         %8, r6
98
+    mov         %9, %6
99
+%%loopx:
100
+    movu        m0, [%8]
101
+    movu        m1, [%8+%1*mmsize/4]
102
+%if cpuflag(ssse3)
103
+    pshufb      m0, m3        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
104
+    pshufb      m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
105
+%elif %1 == 3
106
+    psrldq      m2, m0, 6
107
+    punpcklqdq  m0, m1        ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5
108
+    psrldq      m1, 6
109
+    punpcklqdq  m2, m1        ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7
110
+    psrlq       m3, m0, 24
111
+    psrlq       m4, m2, 24
112
+    punpckhbw   m1, m0, m3    ; b4 b5 g4 g5 r4 r5
113
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
114
+    punpckhbw   m3, m2, m4    ; b6 b7 g6 g7 r6 r7
115
+    punpcklbw   m2, m4        ; b2 b3 g2 g3 r2 r3
116
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
117
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
118
+%else
119
+    pshufd      m3, m0, q2301
120
+    pshufd      m4, m1, q2301
121
+    punpckhbw   m2, m0, m3    ; b2 b3 g2 g3 r2 r3
122
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
123
+    punpckhbw   m3, m1, m4    ; b6 b7 g6 g7 r6 r7
124
+    punpcklbw   m1, m4        ; b4 b5 g4 g5 r4 r5
125
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
126
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
127
+%endif
128
+    punpckldq   m2, m0, m1    ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7
129
+    punpckhdq   m0, m1        ; r0 r1 r2 r3 r4 r5 r6 r7
130
+    movh   [r0+%9], m2
131
+    movhps [r2+%9], m2
132
+    movh   [r4+%9], m0
133
+    add         %8, %1*mmsize/2
134
+    add         %9, mmsize/2
135
+    jl %%loopx
136
+    add         r0, %2
137
+    add         r2, %3
138
+    add         r4, %4
139
+    add         r6, %5
140
+    dec        %7d
141
+    jg %%loopy
142
+%endmacro
143
+
144
+%macro PLANE_DEINTERLEAVE_RGB 0
145
+;-----------------------------------------------------------------------------
146
+; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta,
147
+;                                        pixel *dstb, intptr_t i_dstb,
148
+;                                        pixel *dstc, intptr_t i_dstc,
149
+;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
150
+;-----------------------------------------------------------------------------
151
+%if ARCH_X86_64
152
+cglobal plane_copy_deinterleave_rgb, 8,12
153
+    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
154
+    mov        r8d, r9m
155
+    mov        r9d, r10m
156
+    add         r0, r8
157
+    add         r2, r8
158
+    add         r4, r8
159
+    neg         r8
160
+%else
161
+cglobal plane_copy_deinterleave_rgb, 1,7
162
+    %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5
163
+    mov         r1, r9m
164
+    mov         r2, r2m
165
+    mov         r4, r4m
166
+    mov         r6, r6m
167
+    add         r0, r1
168
+    add         r2, r1
169
+    add         r4, r1
170
+    neg         r1
171
+    mov        r9m, r1
172
+    mov         r1, r10m
173
+%endif
174
+    cmp  dword r8m, 4
175
+    je .pw4
176
+    PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR
177
+    jmp .ret
178
+.pw4:
179
+    PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA
180
+.ret:
181
+    REP_RET
182
+%endmacro
183
+
184
+%if HIGH_BIT_DEPTH == 0
185
+INIT_XMM sse2
186
+PLANE_DEINTERLEAVE_RGB
187
+INIT_XMM ssse3
188
+PLANE_DEINTERLEAVE_RGB
189
+%endif ; !HIGH_BIT_DEPTH
190
+
191
+%macro PLANE_DEINTERLEAVE_V210 0
192
+;-----------------------------------------------------------------------------
193
+; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty,
194
+;                                         uint16_t *dstc, intptr_t i_dstc,
195
+;                                         uint32_t *src, intptr_t i_src, int w, int h )
196
+;-----------------------------------------------------------------------------
197
+%if ARCH_X86_64
198
+cglobal plane_copy_deinterleave_v210, 8,10,7
199
+%define src   r8
200
+%define org_w r9
201
+%define h     r7d
202
+%else
203
+cglobal plane_copy_deinterleave_v210, 7,7,7
204
+%define src   r4m
205
+%define org_w r6m
206
+%define h     dword r7m
207
+%endif
208
+    FIX_STRIDES r1, r3, r6d
209
+    shl    r5, 2
210
+    add    r0, r6
211
+    add    r2, r6
212
+    neg    r6
213
+    mov   src, r4
214
+    mov org_w, r6
215
+    mova   m2, [v210_mask]
216
+    mova   m3, [v210_luma_shuf]
217
+    mova   m4, [v210_chroma_shuf]
218
+    mova   m5, [v210_mult] ; also functions as vpermd index for avx2
219
+    pshufd m6, m5, q1102
220
+
221
+ALIGN 16
222
+.loop:
223
+    movu   m1, [r4]
224
+    pandn  m0, m2, m1
225
+    pand   m1, m2
226
+    pshufb m0, m3
227
+    pshufb m1, m4
228
+    pmulhrsw m0, m5 ; y0 y1 y2 y3 y4 y5 __ __
229
+    pmulhrsw m1, m6 ; u0 v0 u1 v1 u2 v2 __ __
230
+%if mmsize == 32
231
+    vpermd m0, m5, m0
232
+    vpermd m1, m5, m1
233
+%endif
234
+    movu [r0+r6], m0
235
+    movu [r2+r6], m1
236
+    add    r4, mmsize
237
+    add    r6, 3*mmsize/4
238
+    jl .loop
239
+    add    r0, r1
240
+    add    r2, r3
241
+    add   src, r5
242
+    mov    r4, src
243
+    mov    r6, org_w
244
+    dec     h
245
+    jg .loop
246
+    RET
247
+%endmacro ; PLANE_DEINTERLEAVE_V210
248
+
249
 %if HIGH_BIT_DEPTH
250
 INIT_MMX mmx2
251
 PLANE_INTERLEAVE
252
@@ -1205,9 +1375,14 @@
253
 INIT_XMM sse2
254
 PLANE_INTERLEAVE
255
 PLANE_DEINTERLEAVE
256
+INIT_XMM ssse3
257
+PLANE_DEINTERLEAVE_V210
258
 INIT_XMM avx
259
 PLANE_INTERLEAVE
260
 PLANE_DEINTERLEAVE
261
+PLANE_DEINTERLEAVE_V210
262
+INIT_YMM avx2
263
+PLANE_DEINTERLEAVE_V210
264
 %else
265
 INIT_MMX mmx2
266
 PLANE_INTERLEAVE
267
@@ -1813,62 +1988,64 @@
268
 ;                             uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
269
 ;-----------------------------------------------------------------------------
270
 %macro MBTREE 0
271
-cglobal mbtree_propagate_cost, 7,7,7
272
-    add        r6d, r6d
273
-    lea         r0, [r0+r6*2]
274
-    add         r1, r6
275
-    add         r2, r6
276
-    add         r3, r6
277
-    add         r4, r6
278
-    neg         r6
279
-    pxor      xmm4, xmm4
280
-    movss     xmm6, [r5]
281
-    shufps    xmm6, xmm6, 0
282
-    mulps     xmm6, [pf_inv256]
283
-    movdqa    xmm5, [pw_3fff]
284
+cglobal mbtree_propagate_cost, 6,6,7
285
+    movss     m6, [r5]
286
+    mov      r5d, r6m
287
+    lea       r0, [r0+r5*2]
288
+    add      r5d, r5d
289
+    add       r1, r5
290
+    add       r2, r5
291
+    add       r3, r5
292
+    add       r4, r5
293
+    neg       r5
294
+    pxor      m4, m4
295
+    shufps    m6, m6, 0
296
+    mova      m5, [pw_3fff]
297
 .loop:
298
-    movq      xmm2, [r2+r6] ; intra
299
-    movq      xmm0, [r4+r6] ; invq
300
-    movq      xmm3, [r3+r6] ; inter
301
-    movq      xmm1, [r1+r6] ; prop
302
-    punpcklwd xmm2, xmm4
303
-    punpcklwd xmm0, xmm4
304
-    pmaddwd   xmm0, xmm2
305
-    pand      xmm3, xmm5
306
-    punpcklwd xmm1, xmm4
307
-    punpcklwd xmm3, xmm4
308
+    movq      m2, [r2+r5] ; intra
309
+    movq      m0, [r4+r5] ; invq
310
+    movq      m3, [r3+r5] ; inter
311
+    movq      m1, [r1+r5] ; prop
312
+    pand      m3, m5
313
+    pminsw    m3, m2
314
+    punpcklwd m2, m4
315
+    punpcklwd m0, m4
316
+    pmaddwd   m0, m2
317
+    punpcklwd m1, m4
318
+    punpcklwd m3, m4
319
 %if cpuflag(fma4)
320
-    cvtdq2ps  xmm0, xmm0
321
-    cvtdq2ps  xmm1, xmm1
322
-    fmaddps   xmm0, xmm0, xmm6, xmm1
323
-    cvtdq2ps  xmm1, xmm2
324
-    psubd     xmm2, xmm3
325
-    cvtdq2ps  xmm2, xmm2
326
-    rcpps     xmm3, xmm1
327
-    mulps     xmm1, xmm3
328
-    mulps     xmm0, xmm2
329
-    addps     xmm2, xmm3, xmm3
330
-    fnmaddps  xmm3, xmm1, xmm3, xmm2
331
-    mulps     xmm0, xmm3
332
+    cvtdq2ps  m0, m0
333
+    cvtdq2ps  m1, m1
334
+    fmaddps   m0, m0, m6, m1
335
+    cvtdq2ps  m1, m2
336
+    psubd     m2, m3
337
+    cvtdq2ps  m2, m2
338
+    rcpps     m3, m1
339
+    mulps     m1, m3
340
+    mulps     m0, m2
341
+    addps     m2, m3, m3
342
+    fnmaddps  m3, m1, m3, m2
343
+    mulps     m0, m3
344
 %else
345
-    cvtdq2ps  xmm0, xmm0
346
-    mulps     xmm0, xmm6    ; intra*invq*fps_factor>>8
347
-    cvtdq2ps  xmm1, xmm1    ; prop
348
-    addps     xmm0, xmm1    ; prop + (intra*invq*fps_factor>>8)
349
-    cvtdq2ps  xmm1, xmm2    ; intra
350
-    psubd     xmm2, xmm3    ; intra - inter
351
-    cvtdq2ps  xmm2, xmm2    ; intra - inter
352
-    rcpps     xmm3, xmm1    ; 1 / intra 1st approximation
353
-    mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)
354
-    mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)^2
355
-    mulps     xmm0, xmm2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
356
-    addps     xmm3, xmm3    ; 2 * (1/intra 1st approx)
357
-    subps     xmm3, xmm1    ; 2nd approximation for 1/intra
358
-    mulps     xmm0, xmm3    ; / intra
359
-%endif
360
-    cvtps2dq  xmm0, xmm0
361
-    movdqa [r0+r6*2], xmm0
362
-    add         r6, 8
363
+    cvtdq2ps  m0, m0
364
+    mulps     m0, m6    ; intra*invq*fps_factor>>8
365
+    cvtdq2ps  m1, m1    ; prop
366
+    addps     m0, m1    ; prop + (intra*invq*fps_factor>>8)
367
+    cvtdq2ps  m1, m2    ; intra
368
+    psubd     m2, m3    ; intra - inter
369
+    cvtdq2ps  m2, m2    ; intra - inter
370
+    rcpps     m3, m1    ; 1 / intra 1st approximation
371
+    mulps     m1, m3    ; intra * (1/intra 1st approx)
372
+    mulps     m1, m3    ; intra * (1/intra 1st approx)^2
373
+    mulps     m0, m2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
374
+    addps     m3, m3    ; 2 * (1/intra 1st approx)
375
+    subps     m3, m1    ; 2nd approximation for 1/intra
376
+    mulps     m0, m3    ; / intra
377
+%endif
378
+    cvtps2dq  m0, m0
379
+    packssdw  m0, m0
380
+    movh [r0+r5], m0
381
+    add       r5, 8
382
     jl .loop
383
     RET
384
 %endmacro
385
@@ -1880,34 +2057,35 @@
386
 MBTREE
387
 
388
 %macro INT16_UNPACK 1
389
-    vpunpckhwd   xm4, xm%1, xm7
390
-    vpunpcklwd  xm%1, xm7
391
-    vinsertf128  m%1, m%1, xm4, 1
392
+    punpckhwd   xm4, xm%1, xm7
393
+    punpcklwd  xm%1, xm7
394
+    vinsertf128 m%1, m%1, xm4, 1
395
 %endmacro
396
 
397
-; FIXME: align loads/stores to 16 bytes
398
-%macro MBTREE_AVX 0
399
-cglobal mbtree_propagate_cost, 7,7,8
400
-    add          r6d, r6d
401
-    lea           r0, [r0+r6*2]
402
-    add           r1, r6
403
-    add           r2, r6
404
-    add           r3, r6
405
-    add           r4, r6
406
-    neg           r6
407
-    mova         xm5, [pw_3fff]
408
-    vbroadcastss  m6, [r5]
409
-    mulps         m6, [pf_inv256]
410
+; FIXME: align loads to 16 bytes
411
+%macro MBTREE_AVX 1
412
+cglobal mbtree_propagate_cost, 6,6,%1
413
+    vbroadcastss m6, [r5]
414
+    mov         r5d, r6m
415
+    lea          r0, [r0+r5*2]
416
+    add         r5d, r5d
417
+    add          r1, r5
418
+    add          r2, r5
419
+    add          r3, r5
420
+    add          r4, r5
421
+    neg          r5
422
+    mova        xm5, [pw_3fff]
423
 %if notcpuflag(avx2)
424
-    pxor         xm7, xm7
425
+    pxor        xm7, xm7
426
 %endif
427
 .loop:
428
 %if cpuflag(avx2)
429
-    pmovzxwd     m0, [r2+r6]      ; intra
430
-    pmovzxwd     m1, [r4+r6]      ; invq
431
-    pmovzxwd     m2, [r1+r6]      ; prop
432
-    pand        xm3, xm5, [r3+r6] ; inter
433
+    pmovzxwd     m0, [r2+r5]      ; intra
434
+    pmovzxwd     m1, [r4+r5]      ; invq
435
+    pmovzxwd     m2, [r1+r5]      ; prop
436
+    pand        xm3, xm5, [r3+r5] ; inter
437
     pmovzxwd     m3, xm3
438
+    pminsd       m3, m0
439
     pmaddwd      m1, m0
440
     psubd        m4, m0, m3
441
     cvtdq2ps     m0, m0
442
@@ -1922,10 +2100,11 @@
443
     fnmaddps     m4, m2, m3, m4
444
     mulps        m1, m4
445
 %else
446
-    movu        xm0, [r2+r6]
447
-    movu        xm1, [r4+r6]
448
-    movu        xm2, [r1+r6]
449
-    pand        xm3, xm5, [r3+r6]
450
+    movu        xm0, [r2+r5]
451
+    movu        xm1, [r4+r5]
452
+    movu        xm2, [r1+r5]
453
+    pand        xm3, xm5, [r3+r5]
454
+    pminsw      xm3, xm0
455
     INT16_UNPACK 0
456
     INT16_UNPACK 1
457
     INT16_UNPACK 2
458
@@ -1947,13 +2126,107 @@
459
     mulps        m1, m3         ; / intra
460
 %endif
461
     vcvtps2dq    m1, m1
462
-    movu  [r0+r6*2], m1
463
-    add          r6, 16
464
+    vextractf128 xm2, m1, 1
465
+    packssdw    xm1, xm2
466
+    mova    [r0+r5], xm1
467
+    add          r5, 16
468
     jl .loop
469
     RET
470
 %endmacro
471
 
472
 INIT_YMM avx
473
-MBTREE_AVX
474
+MBTREE_AVX 8
475
 INIT_YMM avx2,fma3
476
-MBTREE_AVX
477
+MBTREE_AVX 7
478
+
479
+%macro MBTREE_PROPAGATE_LIST 0
480
+;-----------------------------------------------------------------------------
481
+; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int *propagate_amount, uint16_t *lowres_costs,
482
+;                                      int16_t *output, int bipred_weight, int mb_y, int len )
483
+;-----------------------------------------------------------------------------
484
+cglobal mbtree_propagate_list_internal, 4,6,8
485
+    movh     m6, [pw_0to15] ; mb_x
486
+    movd     m7, r5m
487
+    pshuflw  m7, m7, 0
488
+    punpcklwd m6, m7       ; 0 y 1 y 2 y 3 y
489
+    movd     m7, r4m
490
+    SPLATW   m7, m7        ; bipred_weight
491
+    psllw    m7, 9         ; bipred_weight << 9
492
+
493
+    mov     r5d, r6m
494
+    xor     r4d, r4d
495
+.loop:
496
+    mova     m3, [r1+r4*2]
497
+    movu     m4, [r2+r4*2]
498
+    mova     m5, [pw_0xc000]
499
+    pand     m4, m5
500
+    pcmpeqw  m4, m5
501
+    pmulhrsw m5, m3, m7    ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
502
+%if cpuflag(avx)
503
+    pblendvb m5, m3, m5, m4
504
+%else
505
+    pand     m5, m4
506
+    pandn    m4, m3
507
+    por      m5, m4        ; if( lists_used == 3 )
508
+                           ;     propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
509
+%endif
510
+
511
+    movu     m0, [r0+r4*4] ; x,y
512
+    movu     m1, [r0+r4*4+mmsize]
513
+
514
+    psraw    m2, m0, 5
515
+    psraw    m3, m1, 5
516
+    mova     m4, [pd_4]
517
+    paddw    m2, m6        ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
518
+    paddw    m6, m4        ; {mbx, mby} += {4, 0}
519
+    paddw    m3, m6        ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
520
+    paddw    m6, m4        ; {mbx, mby} += {4, 0}
521
+
522
+    mova [r3+mmsize*0], m2
523
+    mova [r3+mmsize*1], m3
524
+
525
+    mova     m3, [pw_31]
526
+    pand     m0, m3        ; x &= 31
527
+    pand     m1, m3        ; y &= 31
528
+    packuswb m0, m1
529
+    psrlw    m1, m0, 3
530
+    pand     m0, m3        ; x
531
+    SWAP      1, 3
532
+    pandn    m1, m3        ; y premultiplied by (1<<5) for later use of pmulhrsw
533
+
534
+    mova     m3, [pw_32]
535
+    psubw    m3, m0        ; 32 - x
536
+    mova     m4, [pw_1024]
537
+    psubw    m4, m1        ; (32 - y) << 5
538
+
539
+    pmullw   m2, m3, m4    ; idx0weight = (32-y)*(32-x) << 5
540
+    pmullw   m4, m0        ; idx1weight = (32-y)*x << 5
541
+    pmullw   m0, m1        ; idx3weight = y*x << 5
542
+    pmullw   m1, m3        ; idx2weight = y*(32-x) << 5
543
+
544
+    ; avoid overflow in the input to pmulhrsw
545
+    psrlw    m3, m2, 15
546
+    psubw    m2, m3        ; idx0weight -= (idx0weight == 32768)
547
+
548
+    pmulhrsw m2, m5        ; idx0weight * propagate_amount + 512 >> 10
549
+    pmulhrsw m4, m5        ; idx1weight * propagate_amount + 512 >> 10
550
+    pmulhrsw m1, m5        ; idx2weight * propagate_amount + 512 >> 10
551
+    pmulhrsw m0, m5        ; idx3weight * propagate_amount + 512 >> 10
552
+
553
+    SBUTTERFLY wd, 2, 4, 3
554
+    SBUTTERFLY wd, 1, 0, 3
555
+    mova [r3+mmsize*2], m2
556
+    mova [r3+mmsize*3], m4
557
+    mova [r3+mmsize*4], m1
558
+    mova [r3+mmsize*5], m0
559
+    add     r4d, mmsize/2
560
+    add      r3, mmsize*6
561
+    cmp     r4d, r5d
562
+    jl .loop
563
+    REP_RET
564
+%endmacro
565
+
566
+INIT_XMM ssse3
567
+MBTREE_PROPAGATE_LIST
568
+INIT_XMM avx
569
+MBTREE_PROPAGATE_LIST
570
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-c.c Changed
281
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc-c.c: x86 motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -116,6 +116,23 @@
11
 void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
12
                                        uint16_t *dstv, intptr_t i_dstv,
13
                                        uint16_t *src,  intptr_t i_src, int w, int h );
14
+void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
15
+                                             pixel *dstb, intptr_t i_dstb,
16
+                                             pixel *dstc, intptr_t i_dstc,
17
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
18
+void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
19
+                                             pixel *dstb, intptr_t i_dstb,
20
+                                             pixel *dstc, intptr_t i_dstc,
21
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
22
+void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu,
23
+                                              uint16_t *dstv, intptr_t i_dstv,
24
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
25
+void x264_plane_copy_deinterleave_v210_avx  ( uint16_t *dstu, intptr_t i_dstu,
26
+                                              uint16_t *dstv, intptr_t i_dstv,
27
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
28
+void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu,
29
+                                              uint16_t *dstv, intptr_t i_dstv,
30
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
31
 void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
32
 void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
33
 void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
34
@@ -144,13 +161,13 @@
35
 void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
36
 void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
37
 void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
38
-void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
39
+void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
40
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
41
-void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
42
+void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
43
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
44
-void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
45
+void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
46
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
47
-void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
48
+void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
49
                                            uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
50
 
51
 #define MC_CHROMA(cpu)\
52
@@ -158,7 +175,6 @@
53
                            int dx, int dy, int i_width, int i_height );
54
 MC_CHROMA(mmx2)
55
 MC_CHROMA(sse2)
56
-MC_CHROMA(sse2_misalign)
57
 MC_CHROMA(ssse3)
58
 MC_CHROMA(ssse3_cache64)
59
 MC_CHROMA(avx)
60
@@ -186,7 +202,6 @@
61
 PIXEL_AVG_WALL(cache64_mmx2)
62
 PIXEL_AVG_WALL(cache64_sse2)
63
 PIXEL_AVG_WALL(sse2)
64
-PIXEL_AVG_WALL(sse2_misalign)
65
 PIXEL_AVG_WALL(cache64_ssse3)
66
 PIXEL_AVG_WALL(avx2)
67
 
68
@@ -227,7 +242,6 @@
69
 PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2)
70
 #endif
71
 PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2)
72
-PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign)
73
 PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2)
74
 PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2)
75
 PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2)
76
@@ -429,7 +443,6 @@
77
 GET_REF(cache32_mmx2)
78
 GET_REF(cache64_mmx2)
79
 #endif
80
-GET_REF(sse2_misalign)
81
 GET_REF(cache64_sse2)
82
 GET_REF(cache64_ssse3)
83
 GET_REF(cache64_ssse3_atom)
84
@@ -477,7 +490,6 @@
85
 HPEL(16, avx, avx, avx, avx)
86
 HPEL(32, avx2, avx2, avx2, avx2)
87
 #endif
88
-HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
89
 #endif // HIGH_BIT_DEPTH
90
 
91
 static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
92
@@ -521,6 +533,113 @@
93
 PLANE_INTERLEAVE(avx)
94
 #endif
95
 
96
+#if HAVE_X86_INLINE_ASM
97
+#define CLIP_ADD(s,x)\
98
+do\
99
+{\
100
+    int temp;\
101
+    asm("movd       %0, %%xmm0     \n"\
102
+        "movd       %2, %%xmm1     \n"\
103
+        "paddsw %%xmm1, %%xmm0     \n"\
104
+        "movd   %%xmm0, %1         \n"\
105
+        :"+m"(s), "=&r"(temp)\
106
+        :"m"(x)\
107
+    );\
108
+    s = temp;\
109
+} while(0)
110
+
111
+#define CLIP_ADD2(s,x)\
112
+do\
113
+{\
114
+    asm("movd       %0, %%xmm0     \n"\
115
+        "movd       %1, %%xmm1     \n"\
116
+        "paddsw %%xmm1, %%xmm0     \n"\
117
+        "movd   %%xmm0, %0         \n"\
118
+        :"+m"(M32(s))\
119
+        :"m"(M32(x))\
120
+    );\
121
+} while(0)
122
+#else
123
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
124
+#define CLIP_ADD2(s,x)\
125
+do\
126
+{\
127
+    CLIP_ADD((s)[0], (x)[0]);\
128
+    CLIP_ADD((s)[1], (x)[1]);\
129
+} while(0)
130
+#endif
131
+
132
+#define PROPAGATE_LIST(cpu)\
133
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
134
+                                                uint16_t *lowres_costs, int16_t *output,\
135
+                                                int bipred_weight, int mb_y, int len );\
136
+\
137
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
138
+                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
139
+                                              int bipred_weight, int mb_y, int len, int list )\
140
+{\
141
+    int16_t *current = h->scratch_buffer2;\
142
+\
143
+    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
144
+                                               current, bipred_weight, mb_y, len );\
145
+\
146
+    unsigned stride = h->mb.i_mb_stride;\
147
+    unsigned width = h->mb.i_mb_width;\
148
+    unsigned height = h->mb.i_mb_height;\
149
+\
150
+    for( unsigned i = 0; i < len; current += 32 )\
151
+    {\
152
+        int end = X264_MIN( i+8, len );\
153
+        for( ; i < end; i++, current += 2 )\
154
+        {\
155
+            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
156
+                continue;\
157
+\
158
+            unsigned mbx = current[0];\
159
+            unsigned mby = current[1];\
160
+            unsigned idx0 = mbx + mby * stride;\
161
+            unsigned idx2 = idx0 + stride;\
162
+\
163
+            /* Shortcut for the simple/common case of zero MV */\
164
+            if( !M32( mvs[i] ) )\
165
+            {\
166
+                CLIP_ADD( ref_costs[idx0], current[16] );\
167
+                continue;\
168
+            }\
169
+\
170
+            if( mbx < width-1 && mby < height-1 )\
171
+            {\
172
+                CLIP_ADD2( ref_costs+idx0, current+16 );\
173
+                CLIP_ADD2( ref_costs+idx2, current+32 );\
174
+            }\
175
+            else\
176
+            {\
177
+                /* Note: this takes advantage of unsigned representation to\
178
+                 * catch negative mbx/mby. */\
179
+                if( mby < height )\
180
+                {\
181
+                    if( mbx < width )\
182
+                        CLIP_ADD( ref_costs[idx0+0], current[16] );\
183
+                    if( mbx+1 < width )\
184
+                        CLIP_ADD( ref_costs[idx0+1], current[17] );\
185
+                }\
186
+                if( mby+1 < height )\
187
+                {\
188
+                    if( mbx < width )\
189
+                        CLIP_ADD( ref_costs[idx2+0], current[32] );\
190
+                    if( mbx+1 < width )\
191
+                        CLIP_ADD( ref_costs[idx2+1], current[33] );\
192
+                }\
193
+            }\
194
+        }\
195
+    }\
196
+}
197
+
198
+PROPAGATE_LIST(ssse3)
199
+PROPAGATE_LIST(avx)
200
+#undef CLIP_ADD
201
+#undef CLIP_ADD2
202
+
203
 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
204
 {
205
     if( !(cpu&X264_CPU_MMX) )
206
@@ -632,6 +751,8 @@
207
         return;
208
 
209
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3;
210
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3;
211
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
212
 
213
     if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) )
214
         pf->integral_init4v = x264_integral_init4v_ssse3;
215
@@ -644,6 +765,7 @@
216
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
217
     pf->plane_copy_interleave        = x264_plane_copy_interleave_avx;
218
     pf->plane_copy_deinterleave      = x264_plane_copy_deinterleave_avx;
219
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx;
220
     pf->store_interleave_chroma      = x264_store_interleave_chroma_avx;
221
     pf->copy[PIXEL_16x16]            = x264_mc_copy_w16_aligned_avx;
222
 
223
@@ -654,7 +776,10 @@
224
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_xop;
225
 
226
     if( cpu&X264_CPU_AVX2 )
227
+    {
228
         pf->mc_luma = mc_luma_avx2;
229
+        pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2;
230
+    }
231
 #else // !HIGH_BIT_DEPTH
232
 
233
 #if ARCH_X86 // all x86_64 cpus with cacheline split issues use sse2 instead
234
@@ -679,6 +804,7 @@
235
     pf->integral_init8v = x264_integral_init8v_sse2;
236
     pf->hpel_filter = x264_hpel_filter_sse2_amd;
237
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
238
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2;
239
 
240
     if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
241
     {
242
@@ -696,8 +822,6 @@
243
         pf->avg[PIXEL_8x8]  = x264_pixel_avg_8x8_sse2;
244
         pf->avg[PIXEL_8x4]  = x264_pixel_avg_8x4_sse2;
245
         pf->hpel_filter = x264_hpel_filter_sse2;
246
-        if( cpu&X264_CPU_SSE_MISALIGN )
247
-            pf->hpel_filter = x264_hpel_filter_sse2_misalign;
248
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2;
249
         if( !(cpu&X264_CPU_STACK_MOD4) )
250
             pf->mc_chroma = x264_mc_chroma_sse2;
251
@@ -716,12 +840,6 @@
252
                 pf->mc_luma = mc_luma_cache64_sse2;
253
                 pf->get_ref = get_ref_cache64_sse2;
254
             }
255
-            if( cpu&X264_CPU_SSE_MISALIGN )
256
-            {
257
-                pf->get_ref = get_ref_sse2_misalign;
258
-                if( !(cpu&X264_CPU_STACK_MOD4) )
259
-                    pf->mc_chroma = x264_mc_chroma_sse2_misalign;
260
-            }
261
         }
262
     }
263
 
264
@@ -737,6 +855,8 @@
265
     pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_ssse3;
266
     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_ssse3;
267
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_ssse3;
268
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3;
269
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
270
 
271
     if( !(cpu&X264_CPU_SLOW_PSHUFB) )
272
     {
273
@@ -813,6 +933,7 @@
274
         return;
275
     pf->memzero_aligned = x264_memzero_aligned_avx;
276
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
277
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx;
278
 
279
     if( cpu&X264_CPU_FMA4 )
280
         pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;
281
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mc.h: x86 motion compensation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-32.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* pixel-32.asm: x86_32 pixel metrics
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-a.asm Changed
28
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* pixel.asm: x86 pixel metrics
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Holger Lubitz <holger@lubitz.org>
10
@@ -205,7 +205,7 @@
11
     mov    r4d, %%n
12
 %endif
13
     pxor    m0, m0
14
-.loop
15
+.loop:
16
     mova    m1, [r0]
17
     mova    m2, [r0+offset0_1]
18
     mova    m3, [r0+offset0_2]
19
@@ -1265,7 +1265,7 @@
20
 ; clobber: m3..m7
21
 ; out: %1 = satd
22
 %macro SATD_4x4_MMX 3
23
-    %xdefine %%n n%1
24
+    %xdefine %%n nn%1
25
     %assign offset %2*SIZEOF_PIXEL
26
     LOAD_DIFF m4, m3, none, [r0+     offset], [r2+     offset]
27
     LOAD_DIFF m5, m3, none, [r0+  r1+offset], [r2+  r3+offset]
28
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel.h Changed
26
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * pixel.h: x86 pixel metrics
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -47,7 +47,6 @@
11
 
12
 DECL_X1( sad, mmx2 )
13
 DECL_X1( sad, sse2 )
14
-DECL_X4( sad, sse2_misalign )
15
 DECL_X1( sad, sse3 )
16
 DECL_X1( sad, sse2_aligned )
17
 DECL_X1( sad, ssse3 )
18
@@ -57,6 +56,7 @@
19
 DECL_X4( sad, sse2 )
20
 DECL_X4( sad, sse3 )
21
 DECL_X4( sad, ssse3 )
22
+DECL_X4( sad, avx )
23
 DECL_X4( sad, avx2 )
24
 DECL_X1( ssd, mmx )
25
 DECL_X1( ssd, mmx2 )
26
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-a.asm Changed
26
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* predict-a.asm: x86 intra prediction
4
 ;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Holger Lubitz <holger@lubitz.org>
10
@@ -31,7 +31,6 @@
11
 
12
 SECTION_RODATA 32
13
 
14
-pw_0to15:    dw 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
15
 pw_43210123: times 2 dw -3, -2, -1, 0, 1, 2, 3, 4
16
 pw_m3:       times 16 dw -3
17
 pw_m7:       times 16 dw -7
18
@@ -56,6 +55,7 @@
19
 cextern pw_16
20
 cextern pw_00ff
21
 cextern pw_pixel_max
22
+cextern pw_0to15
23
 
24
 %macro STORE8 1
25
     mova [r0+0*FDEC_STRIDEB], %1
26
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-c.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict-c.c: intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * predict.h: x86 intra prediction
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant-a.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* quant-a.asm: x86 quantization and level-run
4
 ;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * quant.h: x86 quantization and level-run
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/sad-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad-a.asm Changed
722
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* sad-a.asm: x86 sad functions
4
 ;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
10
@@ -32,7 +32,6 @@
11
 SECTION_RODATA 32
12
 
13
 pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
14
-deinterleave_sadx4: dd 0,4,2,6
15
 hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
16
 
17
 SECTION .text
18
@@ -1009,62 +1008,56 @@
19
 ;=============================================================================
20
 
21
 %macro SAD_X3_START_1x16P_SSE2 0
22
-%if cpuflag(misalign)
23
-    mova   xmm2, [r0]
24
-    movu   xmm0, [r1]
25
-    movu   xmm1, [r2]
26
-    psadbw xmm0, xmm2
27
-    psadbw xmm1, xmm2
28
-    psadbw xmm2, [r3]
29
+    mova     m2, [r0]
30
+%if cpuflag(avx)
31
+    psadbw   m0, m2, [r1]
32
+    psadbw   m1, m2, [r2]
33
+    psadbw   m2, [r3]
34
 %else
35
-    mova   xmm3, [r0]
36
-    movu   xmm0, [r1]
37
-    movu   xmm1, [r2]
38
-    movu   xmm2, [r3]
39
-    psadbw xmm0, xmm3
40
-    psadbw xmm1, xmm3
41
-    psadbw xmm2, xmm3
42
+    movu     m0, [r1]
43
+    movu     m1, [r2]
44
+    movu     m3, [r3]
45
+    psadbw   m0, m2
46
+    psadbw   m1, m2
47
+    psadbw   m2, m3
48
 %endif
49
 %endmacro
50
 
51
 %macro SAD_X3_1x16P_SSE2 2
52
-%if cpuflag(misalign)
53
-    mova   xmm3, [r0+%1]
54
-    movu   xmm4, [r1+%2]
55
-    movu   xmm5, [r2+%2]
56
-    psadbw xmm4, xmm3
57
-    psadbw xmm5, xmm3
58
-    psadbw xmm3, [r3+%2]
59
-    paddw  xmm0, xmm4
60
-    paddw  xmm1, xmm5
61
-    paddw  xmm2, xmm3
62
+    mova     m3, [r0+%1]
63
+%if cpuflag(avx)
64
+    psadbw   m4, m3, [r1+%2]
65
+    psadbw   m5, m3, [r2+%2]
66
+    psadbw   m3, [r3+%2]
67
 %else
68
-    mova   xmm3, [r0+%1]
69
-    movu   xmm4, [r1+%2]
70
-    movu   xmm5, [r2+%2]
71
-    movu   xmm6, [r3+%2]
72
-    psadbw xmm4, xmm3
73
-    psadbw xmm5, xmm3
74
-    psadbw xmm6, xmm3
75
-    paddw  xmm0, xmm4
76
-    paddw  xmm1, xmm5
77
-    paddw  xmm2, xmm6
78
+    movu     m4, [r1+%2]
79
+    movu     m5, [r2+%2]
80
+    movu     m6, [r3+%2]
81
+    psadbw   m4, m3
82
+    psadbw   m5, m3
83
+    psadbw   m3, m6
84
 %endif
85
+    paddw    m0, m4
86
+    paddw    m1, m5
87
+    paddw    m2, m3
88
 %endmacro
89
 
90
+%if ARCH_X86_64
91
+    DECLARE_REG_TMP 6
92
+%else
93
+    DECLARE_REG_TMP 5
94
+%endif
95
+
96
 %macro SAD_X3_4x16P_SSE2 2
97
 %if %1==0
98
-%if UNIX64
99
-    mov  r6, r5
100
-%endif
101
-    lea  r5, [r4*3]
102
+    lea  t0, [r4*3]
103
     SAD_X3_START_1x16P_SSE2
104
 %else
105
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0
106
 %endif
107
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1
108
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2
109
-    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5
110
+    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0
111
 %if %1 != %2-1
112
 %if (%1&1) != 0
113
     add  r0, 8*FENC_STRIDE
114
@@ -1076,156 +1069,117 @@
115
 %endmacro
116
 
117
 %macro SAD_X3_START_2x8P_SSE2 0
118
-    movq    xmm7, [r0]
119
-    movq    xmm0, [r1]
120
-    movq    xmm1, [r2]
121
-    movq    xmm2, [r3]
122
-    movhps  xmm7, [r0+FENC_STRIDE]
123
-    movhps  xmm0, [r1+r4]
124
-    movhps  xmm1, [r2+r4]
125
-    movhps  xmm2, [r3+r4]
126
-    psadbw  xmm0, xmm7
127
-    psadbw  xmm1, xmm7
128
-    psadbw  xmm2, xmm7
129
+    movq     m3, [r0]
130
+    movq     m0, [r1]
131
+    movq     m1, [r2]
132
+    movq     m2, [r3]
133
+    movhps   m3, [r0+FENC_STRIDE]
134
+    movhps   m0, [r1+r4]
135
+    movhps   m1, [r2+r4]
136
+    movhps   m2, [r3+r4]
137
+    psadbw   m0, m3
138
+    psadbw   m1, m3
139
+    psadbw   m2, m3
140
 %endmacro
141
 
142
 %macro SAD_X3_2x8P_SSE2 4
143
-    movq    xmm7, [r0+%1]
144
-    movq    xmm3, [r1+%2]
145
-    movq    xmm4, [r2+%2]
146
-    movq    xmm5, [r3+%2]
147
-    movhps  xmm7, [r0+%3]
148
-    movhps  xmm3, [r1+%4]
149
-    movhps  xmm4, [r2+%4]
150
-    movhps  xmm5, [r3+%4]
151
-    psadbw  xmm3, xmm7
152
-    psadbw  xmm4, xmm7
153
-    psadbw  xmm5, xmm7
154
-    paddw   xmm0, xmm3
155
-    paddw   xmm1, xmm4
156
-    paddw   xmm2, xmm5
157
+    movq     m6, [r0+%1]
158
+    movq     m3, [r1+%2]
159
+    movq     m4, [r2+%2]
160
+    movq     m5, [r3+%2]
161
+    movhps   m6, [r0+%3]
162
+    movhps   m3, [r1+%4]
163
+    movhps   m4, [r2+%4]
164
+    movhps   m5, [r3+%4]
165
+    psadbw   m3, m6
166
+    psadbw   m4, m6
167
+    psadbw   m5, m6
168
+    paddw    m0, m3
169
+    paddw    m1, m4
170
+    paddw    m2, m5
171
 %endmacro
172
 
173
 %macro SAD_X4_START_2x8P_SSE2 0
174
-    movq    xmm7, [r0]
175
-    movq    xmm0, [r1]
176
-    movq    xmm1, [r2]
177
-    movq    xmm2, [r3]
178
-    movq    xmm3, [r4]
179
-    movhps  xmm7, [r0+FENC_STRIDE]
180
-    movhps  xmm0, [r1+r5]
181
-    movhps  xmm1, [r2+r5]
182
-    movhps  xmm2, [r3+r5]
183
-    movhps  xmm3, [r4+r5]
184
-    psadbw  xmm0, xmm7
185
-    psadbw  xmm1, xmm7
186
-    psadbw  xmm2, xmm7
187
-    psadbw  xmm3, xmm7
188
+    movq     m4, [r0]
189
+    movq     m0, [r1]
190
+    movq     m1, [r2]
191
+    movq     m2, [r3]
192
+    movq     m3, [r4]
193
+    movhps   m4, [r0+FENC_STRIDE]
194
+    movhps   m0, [r1+r5]
195
+    movhps   m1, [r2+r5]
196
+    movhps   m2, [r3+r5]
197
+    movhps   m3, [r4+r5]
198
+    psadbw   m0, m4
199
+    psadbw   m1, m4
200
+    psadbw   m2, m4
201
+    psadbw   m3, m4
202
 %endmacro
203
 
204
 %macro SAD_X4_2x8P_SSE2 4
205
-    movq    xmm7, [r0+%1]
206
-    movq    xmm4, [r1+%2]
207
-    movq    xmm5, [r2+%2]
208
-%if ARCH_X86_64
209
-    movq    xmm6, [r3+%2]
210
-    movq    xmm8, [r4+%2]
211
-    movhps  xmm7, [r0+%3]
212
-    movhps  xmm4, [r1+%4]
213
-    movhps  xmm5, [r2+%4]
214
-    movhps  xmm6, [r3+%4]
215
-    movhps  xmm8, [r4+%4]
216
-    psadbw  xmm4, xmm7
217
-    psadbw  xmm5, xmm7
218
-    psadbw  xmm6, xmm7
219
-    psadbw  xmm8, xmm7
220
-    paddw   xmm0, xmm4
221
-    paddw   xmm1, xmm5
222
-    paddw   xmm2, xmm6
223
-    paddw   xmm3, xmm8
224
-%else
225
-    movhps  xmm7, [r0+%3]
226
-    movhps  xmm4, [r1+%4]
227
-    movhps  xmm5, [r2+%4]
228
-    psadbw  xmm4, xmm7
229
-    psadbw  xmm5, xmm7
230
-    paddw   xmm0, xmm4
231
-    paddw   xmm1, xmm5
232
-    movq    xmm6, [r3+%2]
233
-    movq    xmm4, [r4+%2]
234
-    movhps  xmm6, [r3+%4]
235
-    movhps  xmm4, [r4+%4]
236
-    psadbw  xmm6, xmm7
237
-    psadbw  xmm4, xmm7
238
-    paddw   xmm2, xmm6
239
-    paddw   xmm3, xmm4
240
-%endif
241
+    movq     m6, [r0+%1]
242
+    movq     m4, [r1+%2]
243
+    movq     m5, [r2+%2]
244
+    movhps   m6, [r0+%3]
245
+    movhps   m4, [r1+%4]
246
+    movhps   m5, [r2+%4]
247
+    psadbw   m4, m6
248
+    psadbw   m5, m6
249
+    paddw    m0, m4
250
+    paddw    m1, m5
251
+    movq     m4, [r3+%2]
252
+    movq     m5, [r4+%2]
253
+    movhps   m4, [r3+%4]
254
+    movhps   m5, [r4+%4]
255
+    psadbw   m4, m6
256
+    psadbw   m5, m6
257
+    paddw    m2, m4
258
+    paddw    m3, m5
259
 %endmacro
260
 
261
 %macro SAD_X4_START_1x16P_SSE2 0
262
-%if cpuflag(misalign)
263
-    mova   xmm3, [r0]
264
-    movu   xmm0, [r1]
265
-    movu   xmm1, [r2]
266
-    movu   xmm2, [r3]
267
-    psadbw xmm0, xmm3
268
-    psadbw xmm1, xmm3
269
-    psadbw xmm2, xmm3
270
-    psadbw xmm3, [r4]
271
+    mova     m3, [r0]
272
+%if cpuflag(avx)
273
+    psadbw   m0, m3, [r1]
274
+    psadbw   m1, m3, [r2]
275
+    psadbw   m2, m3, [r3]
276
+    psadbw   m3, [r4]
277
 %else
278
-    mova   xmm7, [r0]
279
-    movu   xmm0, [r1]
280
-    movu   xmm1, [r2]
281
-    movu   xmm2, [r3]
282
-    movu   xmm3, [r4]
283
-    psadbw xmm0, xmm7
284
-    psadbw xmm1, xmm7
285
-    psadbw xmm2, xmm7
286
-    psadbw xmm3, xmm7
287
+    movu     m0, [r1]
288
+    movu     m1, [r2]
289
+    movu     m2, [r3]
290
+    movu     m4, [r4]
291
+    psadbw   m0, m3
292
+    psadbw   m1, m3
293
+    psadbw   m2, m3
294
+    psadbw   m3, m4
295
 %endif
296
 %endmacro
297
 
298
 %macro SAD_X4_1x16P_SSE2 2
299
-%if cpuflag(misalign)
300
-    mova   xmm7, [r0+%1]
301
-    movu   xmm4, [r1+%2]
302
-    movu   xmm5, [r2+%2]
303
-    movu   xmm6, [r3+%2]
304
-    psadbw xmm4, xmm7
305
-    psadbw xmm5, xmm7
306
-    psadbw xmm6, xmm7
307
-    psadbw xmm7, [r4+%2]
308
-    paddw  xmm0, xmm4
309
-    paddw  xmm1, xmm5
310
-    paddw  xmm2, xmm6
311
-    paddw  xmm3, xmm7
312
+    mova     m6, [r0+%1]
313
+%if cpuflag(avx)
314
+    psadbw   m4, m6, [r1+%2]
315
+    psadbw   m5, m6, [r2+%2]
316
 %else
317
-    mova   xmm7, [r0+%1]
318
-    movu   xmm4, [r1+%2]
319
-    movu   xmm5, [r2+%2]
320
-    movu   xmm6, [r3+%2]
321
-%if ARCH_X86_64
322
-    movu   xmm8, [r4+%2]
323
-    psadbw xmm4, xmm7
324
-    psadbw xmm5, xmm7
325
-    psadbw xmm6, xmm7
326
-    psadbw xmm8, xmm7
327
-    paddw  xmm0, xmm4
328
-    paddw  xmm1, xmm5
329
-    paddw  xmm2, xmm6
330
-    paddw  xmm3, xmm8
331
-%else
332
-    psadbw xmm4, xmm7
333
-    psadbw xmm5, xmm7
334
-    paddw  xmm0, xmm4
335
-    psadbw xmm6, xmm7
336
-    movu   xmm4, [r4+%2]
337
-    paddw  xmm1, xmm5
338
-    psadbw xmm4, xmm7
339
-    paddw  xmm2, xmm6
340
-    paddw  xmm3, xmm4
341
+    movu     m4, [r1+%2]
342
+    movu     m5, [r2+%2]
343
+    psadbw   m4, m6
344
+    psadbw   m5, m6
345
 %endif
346
+    paddw    m0, m4
347
+    paddw    m1, m5
348
+%if cpuflag(avx)
349
+    psadbw   m4, m6, [r3+%2]
350
+    psadbw   m5, m6, [r4+%2]
351
+%else
352
+    movu     m4, [r3+%2]
353
+    movu     m5, [r4+%2]
354
+    psadbw   m4, m6
355
+    psadbw   m5, m6
356
 %endif
357
+    paddw    m2, m4
358
+    paddw    m3, m5
359
 %endmacro
360
 
361
 %macro SAD_X4_4x16P_SSE2 2
362
@@ -1251,15 +1205,12 @@
363
 
364
 %macro SAD_X3_4x8P_SSE2 2
365
 %if %1==0
366
-%if UNIX64
367
-    mov  r6, r5
368
-%endif
369
-    lea  r5, [r4*3]
370
+    lea  t0, [r4*3]
371
     SAD_X3_START_2x8P_SSE2
372
 %else
373
     SAD_X3_2x8P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0, FENC_STRIDE*(1+(%1&1)*4), r4*1
374
 %endif
375
-    SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), r5
376
+    SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), t0
377
 %if %1 != %2-1
378
 %if (%1&1) != 0
379
     add  r0, 8*FENC_STRIDE
380
@@ -1290,78 +1241,86 @@
381
 %endmacro
382
 
383
 %macro SAD_X3_END_SSE2 0
384
-    movhlps xmm4, xmm0
385
-    movhlps xmm5, xmm1
386
-    movhlps xmm6, xmm2
387
-    paddw   xmm0, xmm4
388
-    paddw   xmm1, xmm5
389
-    paddw   xmm2, xmm6
390
-%if UNIX64
391
-    movd [r6+0], xmm0
392
-    movd [r6+4], xmm1
393
-    movd [r6+8], xmm2
394
+    movifnidn r5, r5mp
395
+%if cpuflag(ssse3)
396
+    packssdw m0, m1
397
+    packssdw m2, m2
398
+    phaddd   m0, m2
399
+    mova   [r5], m0
400
 %else
401
-    mov      r0, r5mp
402
-    movd [r0+0], xmm0
403
-    movd [r0+4], xmm1
404
-    movd [r0+8], xmm2
405
+    movhlps  m3, m0
406
+    movhlps  m4, m1
407
+    movhlps  m5, m2
408
+    paddw    m0, m3
409
+    paddw    m1, m4
410
+    paddw    m2, m5
411
+    movd [r5+0], m0
412
+    movd [r5+4], m1
413
+    movd [r5+8], m2
414
 %endif
415
     RET
416
 %endmacro
417
 
418
 %macro SAD_X4_END_SSE2 0
419
-    mov       r0, r6mp
420
-    psllq   xmm1, 32
421
-    psllq   xmm3, 32
422
-    paddw   xmm0, xmm1
423
-    paddw   xmm2, xmm3
424
-    movhlps xmm1, xmm0
425
-    movhlps xmm3, xmm2
426
-    paddw   xmm0, xmm1
427
-    paddw   xmm2, xmm3
428
-    movq  [r0+0], xmm0
429
-    movq  [r0+8], xmm2
430
+    mov      r0, r6mp
431
+%if cpuflag(ssse3)
432
+    packssdw m0, m1
433
+    packssdw m2, m3
434
+    phaddd   m0, m2
435
+    mova   [r0], m0
436
+%else
437
+    psllq    m1, 32
438
+    psllq    m3, 32
439
+    paddw    m0, m1
440
+    paddw    m2, m3
441
+    movhlps  m1, m0
442
+    movhlps  m3, m2
443
+    paddw    m0, m1
444
+    paddw    m2, m3
445
+    movq [r0+0], m0
446
+    movq [r0+8], m2
447
+%endif
448
     RET
449
 %endmacro
450
 
451
 %macro SAD_X4_START_2x8P_SSSE3 0
452
-    movddup xmm4, [r0]
453
-    movq    xmm0, [r1]
454
-    movq    xmm1, [r3]
455
-    movhps  xmm0, [r2]
456
-    movhps  xmm1, [r4]
457
-    movddup xmm5, [r0+FENC_STRIDE]
458
-    movq    xmm2, [r1+r5]
459
-    movq    xmm3, [r3+r5]
460
-    movhps  xmm2, [r2+r5]
461
-    movhps  xmm3, [r4+r5]
462
-    psadbw  xmm0, xmm4
463
-    psadbw  xmm1, xmm4
464
-    psadbw  xmm2, xmm5
465
-    psadbw  xmm3, xmm5
466
-    paddw   xmm0, xmm2
467
-    paddw   xmm1, xmm3
468
+    movddup  m4, [r0]
469
+    movq     m0, [r1]
470
+    movq     m1, [r3]
471
+    movhps   m0, [r2]
472
+    movhps   m1, [r4]
473
+    movddup  m5, [r0+FENC_STRIDE]
474
+    movq     m2, [r1+r5]
475
+    movq     m3, [r3+r5]
476
+    movhps   m2, [r2+r5]
477
+    movhps   m3, [r4+r5]
478
+    psadbw   m0, m4
479
+    psadbw   m1, m4
480
+    psadbw   m2, m5
481
+    psadbw   m3, m5
482
+    paddw    m0, m2
483
+    paddw    m1, m3
484
 %endmacro
485
 
486
 %macro SAD_X4_2x8P_SSSE3 4
487
-    movddup xmm6, [r0+%1]
488
-    movq    xmm2, [r1+%2]
489
-    movq    xmm3, [r3+%2]
490
-    movhps  xmm2, [r2+%2]
491
-    movhps  xmm3, [r4+%2]
492
-    movddup xmm7, [r0+%3]
493
-    movq    xmm4, [r1+%4]
494
-    movq    xmm5, [r3+%4]
495
-    movhps  xmm4, [r2+%4]
496
-    movhps  xmm5, [r4+%4]
497
-    psadbw  xmm2, xmm6
498
-    psadbw  xmm3, xmm6
499
-    psadbw  xmm4, xmm7
500
-    psadbw  xmm5, xmm7
501
-    paddw   xmm0, xmm2
502
-    paddw   xmm1, xmm3
503
-    paddw   xmm0, xmm4
504
-    paddw   xmm1, xmm5
505
+    movddup  m6, [r0+%1]
506
+    movq     m2, [r1+%2]
507
+    movq     m3, [r3+%2]
508
+    movhps   m2, [r2+%2]
509
+    movhps   m3, [r4+%2]
510
+    movddup  m7, [r0+%3]
511
+    movq     m4, [r1+%4]
512
+    movq     m5, [r3+%4]
513
+    movhps   m4, [r2+%4]
514
+    movhps   m5, [r4+%4]
515
+    psadbw   m2, m6
516
+    psadbw   m3, m6
517
+    psadbw   m4, m7
518
+    psadbw   m5, m7
519
+    paddw    m0, m2
520
+    paddw    m1, m3
521
+    paddw    m0, m4
522
+    paddw    m1, m5
523
 %endmacro
524
 
525
 %macro SAD_X4_4x8P_SSSE3 2
526
@@ -1384,9 +1343,9 @@
527
 %endmacro
528
 
529
 %macro SAD_X4_END_SSSE3 0
530
-    mov       r0, r6mp
531
-    packssdw xmm0, xmm1
532
-    movdqa  [r0], xmm0
533
+    mov      r0, r6mp
534
+    packssdw m0, m1
535
+    mova   [r0], m0
536
     RET
537
 %endmacro
538
 
539
@@ -1421,15 +1380,12 @@
540
 
541
 %macro SAD_X3_4x16P_AVX2 2
542
 %if %1==0
543
-%if UNIX64
544
-    mov  r6, r5
545
-%endif
546
-    lea  r5, [r4*3]
547
+    lea  t0, [r4*3]
548
     SAD_X3_START_2x16P_AVX2
549
 %else
550
     SAD_X3_2x16P_AVX2 FENC_STRIDE*(0+(%1&1)*4), r4*0, r4*1
551
 %endif
552
-    SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, r5
553
+    SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, t0
554
 %if %1 != %2-1
555
 %if (%1&1) != 0
556
     add  r0, 8*FENC_STRIDE
557
@@ -1444,12 +1400,12 @@
558
     vbroadcasti128 m4, [r0]
559
     vbroadcasti128 m5, [r0+FENC_STRIDE]
560
     movu   xm0, [r1]
561
-    movu   xm1, [r3]
562
+    movu   xm1, [r2]
563
     movu   xm2, [r1+r5]
564
-    movu   xm3, [r3+r5]
565
-    vinserti128 m0, m0, [r2], 1
566
+    movu   xm3, [r2+r5]
567
+    vinserti128 m0, m0, [r3], 1
568
     vinserti128 m1, m1, [r4], 1
569
-    vinserti128 m2, m2, [r2+r5], 1
570
+    vinserti128 m2, m2, [r3+r5], 1
571
     vinserti128 m3, m3, [r4+r5], 1
572
     psadbw  m0, m4
573
     psadbw  m1, m4
574
@@ -1463,12 +1419,12 @@
575
     vbroadcasti128 m6, [r0+%1]
576
     vbroadcasti128 m7, [r0+%3]
577
     movu   xm2, [r1+%2]
578
-    movu   xm3, [r3+%2]
579
+    movu   xm3, [r2+%2]
580
     movu   xm4, [r1+%4]
581
-    movu   xm5, [r3+%4]
582
-    vinserti128 m2, m2, [r2+%2], 1
583
+    movu   xm5, [r2+%4]
584
+    vinserti128 m2, m2, [r3+%2], 1
585
     vinserti128 m3, m3, [r4+%2], 1
586
-    vinserti128 m4, m4, [r2+%4], 1
587
+    vinserti128 m4, m4, [r3+%4], 1
588
     vinserti128 m5, m5, [r4+%4], 1
589
     psadbw  m2, m6
590
     psadbw  m3, m6
591
@@ -1500,41 +1456,22 @@
592
 %endmacro
593
 
594
 %macro SAD_X3_END_AVX2 0
595
-    vextracti128 xm4, m0, 1
596
-    vextracti128 xm5, m1, 1
597
-    vextracti128 xm6, m2, 1
598
-    paddw   xm0, xm4
599
-    paddw   xm1, xm5
600
-    paddw   xm2, xm6
601
-    movhlps xm4, xm0
602
-    movhlps xm5, xm1
603
-    movhlps xm6, xm2
604
-    paddw   xm0, xm4
605
-    paddw   xm1, xm5
606
-    paddw   xm2, xm6
607
-%if UNIX64
608
-    movd [r6+0], xm0
609
-    movd [r6+4], xm1
610
-    movd [r6+8], xm2
611
-%else
612
-    mov      r0, r5mp
613
-    movd [r0+0], xm0
614
-    movd [r0+4], xm1
615
-    movd [r0+8], xm2
616
-%endif
617
+    movifnidn r5, r5mp
618
+    packssdw  m0, m1        ; 0 0 1 1 0 0 1 1
619
+    packssdw  m2, m2        ; 2 2 _ _ 2 2 _ _
620
+    phaddd    m0, m2        ; 0 1 2 _ 0 1 2 _
621
+    vextracti128 xm1, m0, 1
622
+    paddd    xm0, xm1       ; 0 1 2 _
623
+    mova    [r5], xm0
624
     RET
625
 %endmacro
626
 
627
 %macro SAD_X4_END_AVX2 0
628
-    mov      r0, r6mp
629
-    punpckhqdq m2, m0, m0
630
-    punpckhqdq m3, m1, m1
631
-    paddw    m0, m2
632
-    paddw    m1, m3
633
-    packssdw m0, m1
634
-    mova    xm2, [deinterleave_sadx4]
635
-    vpermd   m0, m2, m0
636
-    mova   [r0], xm0
637
+    mov       r0, r6mp
638
+    packssdw  m0, m1        ; 0 0 1 1 2 2 3 3
639
+    vextracti128 xm1, m0, 1
640
+    phaddd   xm0, xm1       ; 0 1 2 3
641
+    mova    [r0], xm0
642
     RET
643
 %endmacro
644
 
645
@@ -1542,8 +1479,8 @@
646
 ; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
647
 ;                          uint8_t *pix2, intptr_t i_stride, int scores[3] )
648
 ;-----------------------------------------------------------------------------
649
-%macro SAD_X_SSE2 3
650
-cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,9
651
+%macro SAD_X_SSE2 4
652
+cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
653
 %assign x 0
654
 %rep %3/4
655
     SAD_X%1_4x%2P_SSE2 x, %3/4
656
@@ -1553,28 +1490,22 @@
657
 %endmacro
658
 
659
 INIT_XMM sse2
660
-SAD_X_SSE2 3, 16, 16
661
-SAD_X_SSE2 3, 16,  8
662
-SAD_X_SSE2 3,  8, 16
663
-SAD_X_SSE2 3,  8,  8
664
-SAD_X_SSE2 3,  8,  4
665
-SAD_X_SSE2 4, 16, 16
666
-SAD_X_SSE2 4, 16,  8
667
-SAD_X_SSE2 4,  8, 16
668
-SAD_X_SSE2 4,  8,  8
669
-SAD_X_SSE2 4,  8,  4
670
-
671
-INIT_XMM sse2, misalign
672
-SAD_X_SSE2 3, 16, 16
673
-SAD_X_SSE2 3, 16,  8
674
-SAD_X_SSE2 4, 16, 16
675
-SAD_X_SSE2 4, 16,  8
676
+SAD_X_SSE2 3, 16, 16, 7
677
+SAD_X_SSE2 3, 16,  8, 7
678
+SAD_X_SSE2 3,  8, 16, 7
679
+SAD_X_SSE2 3,  8,  8, 7
680
+SAD_X_SSE2 3,  8,  4, 7
681
+SAD_X_SSE2 4, 16, 16, 7
682
+SAD_X_SSE2 4, 16,  8, 7
683
+SAD_X_SSE2 4,  8, 16, 7
684
+SAD_X_SSE2 4,  8,  8, 7
685
+SAD_X_SSE2 4,  8,  4, 7
686
 
687
 INIT_XMM sse3
688
-SAD_X_SSE2 3, 16, 16
689
-SAD_X_SSE2 3, 16,  8
690
-SAD_X_SSE2 4, 16, 16
691
-SAD_X_SSE2 4, 16,  8
692
+SAD_X_SSE2 3, 16, 16, 7
693
+SAD_X_SSE2 3, 16,  8, 7
694
+SAD_X_SSE2 4, 16, 16, 7
695
+SAD_X_SSE2 4, 16,  8, 7
696
 
697
 %macro SAD_X_SSSE3 3
698
 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8
699
@@ -1587,9 +1518,19 @@
700
 %endmacro
701
 
702
 INIT_XMM ssse3
703
-SAD_X_SSSE3 4, 8, 16
704
-SAD_X_SSSE3 4, 8,  8
705
-SAD_X_SSSE3 4, 8,  4
706
+SAD_X_SSE2  3, 16, 16, 7
707
+SAD_X_SSE2  3, 16,  8, 7
708
+SAD_X_SSE2  4, 16, 16, 7
709
+SAD_X_SSE2  4, 16,  8, 7
710
+SAD_X_SSSE3 4,  8, 16
711
+SAD_X_SSSE3 4,  8,  8
712
+SAD_X_SSSE3 4,  8,  4
713
+
714
+INIT_XMM avx
715
+SAD_X_SSE2 3, 16, 16, 6
716
+SAD_X_SSE2 3, 16,  8, 6
717
+SAD_X_SSE2 4, 16, 16, 7
718
+SAD_X_SSE2 4, 16,  8, 7
719
 
720
 %macro SAD_X_AVX2 4
721
 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
722
x264-snapshot-20130723-2245.tar.bz2/common/x86/sad16-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad16-a.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* sad16-a.asm: x86 high depth sad functions
4
 ;*****************************************************************************
5
-;* Copyright (C) 2010-2013 x264 project
6
+;* Copyright (C) 2010-2014 x264 project
7
 ;*
8
 ;* Authors: Oskar Arvidsson <oskar@irock.se>
9
 ;*          Henrik Gramner <henrik@gramner.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/trellis-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/trellis-64.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* trellis-64.asm: x86_64 trellis quantization
4
 ;*****************************************************************************
5
-;* Copyright (C) 2012-2013 x264 project
6
+;* Copyright (C) 2012-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/util.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/util.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * util.h: x86 inline asm
4
  *****************************************************************************
5
- * Copyright (C) 2008-2013 x264 project
6
+ * Copyright (C) 2008-2014 x264 project
7
  *
8
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86inc.asm Changed
369
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* x86inc.asm: x264asm abstraction layer
4
 ;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
10
@@ -42,6 +42,14 @@
11
     %define public_prefix private_prefix
12
 %endif
13
 
14
+%ifndef STACK_ALIGNMENT
15
+    %if ARCH_X86_64
16
+        %define STACK_ALIGNMENT 16
17
+    %else
18
+        %define STACK_ALIGNMENT 4
19
+    %endif
20
+%endif
21
+
22
 %define WIN64  0
23
 %define UNIX64 0
24
 %if ARCH_X86_64
25
@@ -49,6 +57,8 @@
26
         %define WIN64  1
27
     %elifidn __OUTPUT_FORMAT__,win64
28
         %define WIN64  1
29
+    %elifidn __OUTPUT_FORMAT__,x64
30
+        %define WIN64  1
31
     %else
32
         %define UNIX64 1
33
     %endif
34
@@ -92,8 +102,9 @@
35
 ; %1 = number of arguments. loads them from stack if needed.
36
 ; %2 = number of registers used. pushes callee-saved regs if needed.
37
 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
38
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
39
-;      MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
40
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
41
+;      allocating the specified stack size. If the required stack alignment is
42
+;      larger than the known stack alignment the stack will be manually aligned
43
 ;      and an extra register will be allocated to hold the original stack
44
 ;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
45
 ;      register as stack pointer, request a negative stack size.
46
@@ -101,8 +112,10 @@
47
 ; PROLOGUE can also be invoked by adding the same options to cglobal
48
 
49
 ; e.g.
50
-; cglobal foo, 2,3,0, dst, src, tmp
51
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
52
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
53
+; declares a function (foo) that automatically loads two arguments (dst and
54
+; src) into registers, uses one additional register (tmp) plus 7 vector
55
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
56
 
57
 ; TODO Some functions can use some args directly from the stack. If they're the
58
 ; last args then you can just not declare them, but if they're in the middle
59
@@ -302,26 +315,28 @@
60
     %assign n_arg_names %0
61
 %endmacro
62
 
63
+%define required_stack_alignment ((mmsize + 15) & ~15)
64
+
65
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
66
     %ifnum %1
67
         %if %1 != 0
68
-            %assign %%stack_alignment ((mmsize + 15) & ~15)
69
+            %assign %%pad 0
70
             %assign stack_size %1
71
             %if stack_size < 0
72
                 %assign stack_size -stack_size
73
             %endif
74
-            %assign stack_size_padded stack_size
75
             %if WIN64
76
-                %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
77
+                %assign %%pad %%pad + 32 ; shadow space
78
                 %if mmsize != 8
79
                     %assign xmm_regs_used %2
80
                     %if xmm_regs_used > 8
81
-                        %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
82
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
83
                     %endif
84
                 %endif
85
             %endif
86
-            %if mmsize <= 16 && HAVE_ALIGNED_STACK
87
-                %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
88
+            %if required_stack_alignment <= STACK_ALIGNMENT
89
+                ; maintain the current stack alignment
90
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
91
                 SUB rsp, stack_size_padded
92
             %else
93
                 %assign %%reg_num (regs_used - 1)
94
@@ -330,17 +345,17 @@
95
                 ; it, i.e. in [rsp+stack_size_padded], so we can restore the
96
                 ; stack in a single instruction (i.e. mov rsp, rstk or mov
97
                 ; rsp, [rsp+stack_size_padded])
98
-                mov  rstk, rsp
99
                 %if %1 < 0 ; need to store rsp on stack
100
-                    sub  rsp, gprsize+stack_size_padded
101
-                    and  rsp, ~(%%stack_alignment-1)
102
-                    %xdefine rstkm [rsp+stack_size_padded]
103
-                    mov rstkm, rstk
104
+                    %xdefine rstkm [rsp + stack_size + %%pad]
105
+                    %assign %%pad %%pad + gprsize
106
                 %else ; can keep rsp in rstk during whole function
107
-                    sub  rsp, stack_size_padded
108
-                    and  rsp, ~(%%stack_alignment-1)
109
                     %xdefine rstkm rstk
110
                 %endif
111
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
112
+                mov rstk, rsp
113
+                and rsp, ~(required_stack_alignment-1)
114
+                sub rsp, stack_size_padded
115
+                movifnidn rstkm, rstk
116
             %endif
117
             WIN64_PUSH_XMM
118
         %endif
119
@@ -349,7 +364,7 @@
120
 
121
 %macro SETUP_STACK_POINTER 1
122
     %ifnum %1
123
-        %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
124
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
125
             %if %1 > 0
126
                 %assign regs_used (regs_used + 1)
127
             %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
128
@@ -423,7 +438,9 @@
129
     %assign xmm_regs_used %1
130
     ASSERT xmm_regs_used <= 16
131
     %if xmm_regs_used > 8
132
-        %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
133
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
134
+        %assign %%pad (xmm_regs_used-8)*16 + 32
135
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
136
         SUB rsp, stack_size_padded
137
     %endif
138
     WIN64_PUSH_XMM
139
@@ -439,7 +456,7 @@
140
         %endrep
141
     %endif
142
     %if stack_size_padded > 0
143
-        %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
144
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
145
             mov rsp, rstkm
146
         %else
147
             add %1, stack_size_padded
148
@@ -505,7 +522,7 @@
149
 
150
 %macro RET 0
151
 %if stack_size_padded > 0
152
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
153
+%if required_stack_alignment > STACK_ALIGNMENT
154
     mov rsp, rstkm
155
 %else
156
     add rsp, stack_size_padded
157
@@ -561,7 +578,7 @@
158
 
159
 %macro RET 0
160
 %if stack_size_padded > 0
161
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
162
+%if required_stack_alignment > STACK_ALIGNMENT
163
     mov rsp, rstkm
164
 %else
165
     add rsp, stack_size_padded
166
@@ -731,11 +748,10 @@
167
 %assign cpuflags_cache64  (1<<17)
168
 %assign cpuflags_slowctz  (1<<18)
169
 %assign cpuflags_lzcnt    (1<<19)
170
-%assign cpuflags_misalign (1<<20)
171
-%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
172
-%assign cpuflags_atom     (1<<22)
173
-%assign cpuflags_bmi1     (1<<23)|cpuflags_lzcnt
174
-%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
175
+%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
176
+%assign cpuflags_atom     (1<<21)
177
+%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
178
+%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
179
 
180
 %define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
181
 %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
182
@@ -777,9 +793,9 @@
183
 %endmacro
184
 
185
 ; Merge mmx and sse*
186
-; m# is a simd regsiter of the currently selected size
187
-; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
188
-; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
189
+; m# is a simd register of the currently selected size
190
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
191
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
192
 ; (All 3 remain in sync through SWAP.)
193
 
194
 %macro CAT_XDEFINE 3
195
@@ -802,12 +818,12 @@
196
     %assign %%i 0
197
     %rep 8
198
     CAT_XDEFINE m, %%i, mm %+ %%i
199
-    CAT_XDEFINE nmm, %%i, %%i
200
+    CAT_XDEFINE nnmm, %%i, %%i
201
     %assign %%i %%i+1
202
     %endrep
203
     %rep 8
204
     CAT_UNDEF m, %%i
205
-    CAT_UNDEF nmm, %%i
206
+    CAT_UNDEF nnmm, %%i
207
     %assign %%i %%i+1
208
     %endrep
209
     INIT_CPUFLAGS %1
210
@@ -828,7 +844,7 @@
211
     %assign %%i 0
212
     %rep num_mmregs
213
     CAT_XDEFINE m, %%i, xmm %+ %%i
214
-    CAT_XDEFINE nxmm, %%i, %%i
215
+    CAT_XDEFINE nnxmm, %%i, %%i
216
     %assign %%i %%i+1
217
     %endrep
218
     INIT_CPUFLAGS %1
219
@@ -865,7 +881,7 @@
220
     %define xmmxmm%1 xmm%1
221
     %define xmmymm%1 xmm%1
222
     %define ymmmm%1   mm%1
223
-    %define ymmxmm%1 ymm%1
224
+    %define ymmxmm%1 xmm%1
225
     %define ymmymm%1 ymm%1
226
     %define xm%1 xmm %+ m%1
227
     %define ym%1 ymm %+ m%1
228
@@ -898,7 +914,7 @@
229
 %endrep
230
 %rep %0/2
231
     %xdefine m%1 %%tmp%2
232
-    CAT_XDEFINE n, m%1, %1
233
+    CAT_XDEFINE nn, m%1, %1
234
     %rotate 2
235
 %endrep
236
 %endmacro
237
@@ -916,16 +932,16 @@
238
         %xdefine %%tmp m%1
239
         %xdefine m%1 m%2
240
         %xdefine m%2 %%tmp
241
-        CAT_XDEFINE n, m%1, %1
242
-        CAT_XDEFINE n, m%2, %2
243
+        CAT_XDEFINE nn, m%1, %1
244
+        CAT_XDEFINE nn, m%2, %2
245
     %rotate 1
246
     %endrep
247
 %endmacro
248
 
249
 %macro SWAP_INTERNAL_NAME 2-*
250
-    %xdefine %%args n %+ %1
251
+    %xdefine %%args nn %+ %1
252
     %rep %0-1
253
-        %xdefine %%args %%args, n %+ %2
254
+        %xdefine %%args %%args, nn %+ %2
255
     %rotate 1
256
     %endrep
257
     SWAP_INTERNAL_NUM %%args
258
@@ -952,7 +968,7 @@
259
         %assign %%i 0
260
         %rep num_mmregs
261
             CAT_XDEFINE m, %%i, %1_m %+ %%i
262
-            CAT_XDEFINE n, m %+ %%i, %%i
263
+            CAT_XDEFINE nn, m %+ %%i, %%i
264
         %assign %%i %%i+1
265
         %endrep
266
     %endif
267
@@ -1031,25 +1047,25 @@
268
 ;%5+: operands
269
 %macro RUN_AVX_INSTR 5-8+
270
     %ifnum sizeof%6
271
-        %assign %%sizeofreg sizeof%6
272
+        %assign __sizeofreg sizeof%6
273
     %elifnum sizeof%5
274
-        %assign %%sizeofreg sizeof%5
275
+        %assign __sizeofreg sizeof%5
276
     %else
277
-        %assign %%sizeofreg mmsize
278
+        %assign __sizeofreg mmsize
279
     %endif
280
-    %assign %%emulate_avx 0
281
-    %if avx_enabled && %%sizeofreg >= 16
282
-        %xdefine %%instr v%1
283
+    %assign __emulate_avx 0
284
+    %if avx_enabled && __sizeofreg >= 16
285
+        %xdefine __instr v%1
286
     %else
287
-        %xdefine %%instr %1
288
+        %xdefine __instr %1
289
         %if %0 >= 7+%3
290
-            %assign %%emulate_avx 1
291
+            %assign __emulate_avx 1
292
         %endif
293
     %endif
294
 
295
-    %if %%emulate_avx
296
-        %xdefine %%src1 %6
297
-        %xdefine %%src2 %7
298
+    %if __emulate_avx
299
+        %xdefine __src1 %6
300
+        %xdefine __src2 %7
301
         %ifnidn %5, %6
302
             %if %0 >= 8
303
                 CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8
304
@@ -1061,31 +1077,31 @@
305
                     ; 3-operand AVX instructions with a memory arg can only have it in src2,
306
                     ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
307
                     ; So, if the instruction is commutative with a memory arg, swap them.
308
-                    %xdefine %%src1 %7
309
-                    %xdefine %%src2 %6
310
+                    %xdefine __src1 %7
311
+                    %xdefine __src2 %6
312
                 %endif
313
             %endif
314
-            %if %%sizeofreg == 8
315
-                MOVQ %5, %%src1
316
+            %if __sizeofreg == 8
317
+                MOVQ %5, __src1
318
             %elif %2
319
-                MOVAPS %5, %%src1
320
+                MOVAPS %5, __src1
321
             %else
322
-                MOVDQA %5, %%src1
323
+                MOVDQA %5, __src1
324
             %endif
325
         %endif
326
         %if %0 >= 8
327
-            %1 %5, %%src2, %8
328
+            %1 %5, __src2, %8
329
         %else
330
-            %1 %5, %%src2
331
+            %1 %5, __src2
332
         %endif
333
     %elif %0 >= 8
334
-        %%instr %5, %6, %7, %8
335
+        __instr %5, %6, %7, %8
336
     %elif %0 == 7
337
-        %%instr %5, %6, %7
338
+        __instr %5, %6, %7
339
     %elif %0 == 6
340
-        %%instr %5, %6
341
+        __instr %5, %6
342
     %else
343
-        %%instr %5
344
+        __instr %5
345
     %endif
346
 %endmacro
347
 
348
@@ -1384,15 +1400,18 @@
349
     %macro %1 4-7 %1, %2, %3
350
         %if cpuflag(xop)
351
             v%5 %1, %2, %3, %4
352
-        %else
353
+        %elifnidn %1, %4
354
             %6 %1, %2, %3
355
             %7 %1, %4
356
+        %else
357
+            %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
358
         %endif
359
     %endmacro
360
 %endmacro
361
 
362
-FMA_INSTR  pmacsdd,  pmulld, paddd
363
 FMA_INSTR  pmacsww,  pmullw, paddw
364
+FMA_INSTR  pmacsdd,  pmulld, paddd ; sse4 emulation
365
+FMA_INSTR pmacsdql,  pmuldq, paddq ; sse4 emulation
366
 FMA_INSTR pmadcswd, pmaddwd, paddd
367
 
368
 ; convert FMA4 to FMA3 if possible
369
x264-snapshot-20130723-2245.tar.bz2/common/x86/x86util.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86util.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* x86util.asm: x86 utility macros
4
 ;*****************************************************************************
5
-;* Copyright (C) 2008-2013 x264 project
6
+;* Copyright (C) 2008-2014 x264 project
7
 ;*
8
 ;* Authors: Holger Lubitz <holger@lubitz.org>
9
 ;*          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/configure -> x264-snapshot-20140321-2245.tar.bz2/configure Changed
346
 
1
@@ -30,7 +30,6 @@
2
   --disable-thread         disable multithreaded encoding
3
   --enable-win32thread     use win32threads (windows only)
4
   --disable-interlaced     disable interlaced encoding support
5
-  --enable-visualize       enable visualization (X11 only)
6
   --bit-depth=BIT_DEPTH    set output bit depth (8-10) [8]
7
   --chroma-format=FORMAT   output chroma format (420, 422, 444, all) [all]
8
 
9
@@ -52,6 +51,7 @@
10
   --disable-lavf           disable libavformat support
11
   --disable-ffms           disable ffmpegsource support
12
   --disable-gpac           disable gpac support
13
+  --disable-lsmash         disable lsmash support
14
 
15
 EOF
16
 exit 1
17
@@ -264,6 +264,8 @@
18
 lavf="auto"
19
 ffms="auto"
20
 gpac="auto"
21
+lsmash="auto"
22
+mp4="no"
23
 gpl="yes"
24
 thread="auto"
25
 swscale="auto"
26
@@ -273,7 +275,6 @@
27
 gprof="no"
28
 strip="no"
29
 pic="no"
30
-vis="no"
31
 bit_depth="8"
32
 chroma_format="all"
33
 compiler="GNU"
34
@@ -290,7 +291,8 @@
35
 EXE=""
36
 
37
 # list of all preprocessor HAVE values we can define
38
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
39
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
40
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH"
41
 
42
 # parse options
43
 
44
@@ -342,6 +344,9 @@
45
         --disable-gpac)
46
             gpac="no"
47
             ;;
48
+        --disable-lsmash)
49
+            lsmash="no"
50
+            ;;
51
         --disable-gpl)
52
             gpl="no"
53
             ;;
54
@@ -380,9 +385,6 @@
55
         --enable-pic)
56
             pic="yes"
57
             ;;
58
-        --enable-visualize)
59
-            vis="yes"
60
-            ;;
61
         --host=*)
62
             host="$optarg"
63
             ;;
64
@@ -423,6 +425,7 @@
65
 AR="${AR-${cross_prefix}ar}"
66
 RANLIB="${RANLIB-${cross_prefix}ranlib}"
67
 STRIP="${STRIP-${cross_prefix}strip}"
68
+INSTALL="${INSTALL-install}"
69
 
70
 if [ "x$host" = x ]; then
71
     host=`${SRCPATH}/config.guess`
72
@@ -503,12 +506,13 @@
73
             CFLAGS="$CFLAGS -mno-cygwin"
74
             LDFLAGS="$LDFLAGS -mno-cygwin"
75
         fi
76
-        if cpp_check "" "" "defined(__CYGWIN32__)" ; then
77
+        if cpp_check "" "" "defined(__CYGWIN__)" ; then
78
             define HAVE_MALLOC_H
79
             SYS="CYGWIN"
80
         else
81
             SYS="WINDOWS"
82
             DEVNULL="NUL"
83
+            LDFLAGSCLI="$LDFLAGSCLI -lshell32"
84
             RC="${RC-${cross_prefix}windres}"
85
         fi
86
         ;;
87
@@ -516,6 +520,7 @@
88
         SYS="WINDOWS"
89
         EXE=".exe"
90
         DEVNULL="NUL"
91
+        LDFLAGSCLI="$LDFLAGSCLI -lshell32"
92
         [ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}"
93
         ;;
94
     sunos*|solaris*)
95
@@ -527,6 +532,15 @@
96
         else
97
             LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o"
98
         fi
99
+        if test -x /usr/ucb/install ; then
100
+            INSTALL=/usr/ucb/install
101
+        elif test -x /usr/bin/ginstall ; then
102
+            # OpenSolaris
103
+            INSTALL=/usr/bin/ginstall
104
+        elif test -x /usr/gnu/bin/install ; then
105
+            # OpenSolaris
106
+            INSTALL=/usr/gnu/bin/install
107
+        fi
108
         HAVE_GETOPT_LONG=0
109
         ;;
110
     *qnx*)
111
@@ -543,7 +557,7 @@
112
 
113
 LDFLAGS="$LDFLAGS $libm"
114
 
115
-aligned_stack=1
116
+stack_alignment=16
117
 case $host_cpu in
118
     i*86)
119
         ARCH="X86"
120
@@ -563,8 +577,7 @@
121
             if [ $SYS = LINUX ]; then
122
                 # < 11 is completely incapable of keeping a mod16 stack
123
                 if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
124
-                    define BROKEN_STACK_ALIGNMENT
125
-                    aligned_stack=0
126
+                    stack_alignment=4
127
                 # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
128
                 elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
129
                     CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
130
@@ -572,7 +585,7 @@
131
                 # >= 12 defaults to a mod16 stack
132
             fi
133
             # icl on windows has no mod16 stack support
134
-            [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
135
+            [ $SYS = WINDOWS ] && stack_alignment=4
136
         fi
137
         if [ "$SYS" = MACOSX ]; then
138
             ASFLAGS="$ASFLAGS -f macho -DPREFIX"
139
@@ -595,7 +608,7 @@
140
                 CFLAGS="$CFLAGS -arch x86_64"
141
                 LDFLAGS="$LDFLAGS -arch x86_64"
142
             fi
143
-        elif [ "$SYS" = WINDOWS ]; then
144
+        elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
145
             ASFLAGS="$ASFLAGS -f win32 -m amd64"
146
             # only the GNU toolchain is inconsistent in prefixing function names with _
147
             [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
148
@@ -667,7 +680,6 @@
149
         ARCH="$(echo $host_cpu | tr a-z A-Z)"
150
         ;;
151
 esac
152
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
153
 
154
 if [ $SYS = WINDOWS ]; then
155
     if ! rc_check "0 RCDATA {0}" ; then
156
@@ -719,10 +731,11 @@
157
         echo "If you really want to compile without asm, configure with --disable-asm."
158
         exit 1
159
     fi
160
+    ASFLAGS="$ASFLAGS -Worphan-labels"
161
     define HAVE_MMX
162
-    if cc_check '' -mpreferred-stack-boundary=5 ; then
163
+    if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
164
         CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
165
-        define HAVE_32B_STACK_ALIGNMENT
166
+        stack_alignment=32
167
     fi
168
 fi
169
 
170
@@ -747,6 +760,9 @@
171
 define ARCH_$ARCH
172
 define SYS_$SYS
173
 
174
+define STACK_ALIGNMENT $stack_alignment
175
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
176
+
177
 # skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
178
 if [ $compiler = GNU ]; then
179
     echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
180
@@ -792,10 +808,15 @@
181
             fi
182
             ;;
183
         QNX)
184
-            cc_check pthread.h -lc && thread="posix" && libpthread="-lc"
185
+            cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc"
186
             ;;
187
         *)
188
-            cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread"
189
+            if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
190
+               thread="posix"
191
+               libpthread="-lpthread"
192
+            else
193
+                cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread=""
194
+            fi
195
             ;;
196
     esac
197
 fi
198
@@ -820,16 +841,8 @@
199
     define HAVE_LOG2F
200
 fi
201
 
202
-if [ "$vis" = "yes" ] ; then
203
-    save_CFLAGS="$CFLAGS"
204
-    CFLAGS="$CFLAGS -I/usr/X11R6/include"
205
-    if cc_check "X11/Xlib.h" "-L/usr/X11R6/lib -lX11" "XOpenDisplay(0);" ; then
206
-        LDFLAGS="-L/usr/X11R6/lib -lX11 $LDFLAGS"
207
-        define HAVE_VISUALIZE
208
-    else
209
-        vis="no"
210
-        CFLAGS="$save_CFLAGS"
211
-   fi
212
+if [ "$SYS" = "LINUX" -a \( "$ARCH" = "X86" -o "$ARCH" = "X86_64" \) ] && cc_check "sys/mman.h" "" "MADV_HUGEPAGE;" ; then
213
+    define HAVE_THP
214
 fi
215
 
216
 if [ "$swscale" = "auto" ] ; then
217
@@ -841,10 +854,10 @@
218
     [ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil"
219
 
220
     if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then
221
-        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(PIX_FMT_RGB)" ; then
222
+        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then
223
             swscale="yes"
224
         else
225
-            echo "Warning: PIX_FMT_RGB is missing from libavutil, update for swscale support"
226
+            echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support"
227
         fi
228
     fi
229
 fi
230
@@ -857,7 +870,7 @@
231
     fi
232
     if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
233
         LAVF_LIBS="-lavformat"
234
-        for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do
235
+        for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do
236
             cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
237
         done
238
     fi
239
@@ -915,11 +928,30 @@
240
     fi
241
 fi
242
 
243
-if [ "$gpac" = "auto" ] ; then
244
+if [ "$lsmash" = "auto" ] ; then
245
+    lsmash="no"
246
+    if ${cross_prefix}pkg-config --exists liblsmash 2>/dev/null; then
247
+        LSMASH_LIBS="$LSMASH_LIBS $(${cross_prefix}pkg-config --libs liblsmash)"
248
+        LSMASH_CFLAGS="$LSMASH_CFLAGS $(${cross_prefix}pkg-config --cflags liblsmash)"
249
+    fi
250
+    [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash"
251
+
252
+    if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then
253
+        if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 0 || (LSMASH_VERSION_MAJOR == 0 && LSMASH_VERSION_MINOR >= 1)" ; then
254
+            lsmash="yes"
255
+        else
256
+            echo "Warning: lsmash is too old, update to rev.751 or later"
257
+        fi
258
+    fi
259
+fi
260
+
261
+if [ "$gpac" = "auto" -a "$lsmash" != "yes" ] ; then
262
     gpac="no"
263
-    cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static"
264
+    GPAC_LIBS="-lgpac_static"
265
+    cc_check "" -lz && GPAC_LIBS="$GPAC_LIBS -lz"
266
     if [ "$SYS" = "WINDOWS" ] ; then
267
-        GPAC_LIBS="$GPAC_LIBS -lwinmm"
268
+        cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32"
269
+        cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm"
270
     fi
271
     if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then
272
         if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
273
@@ -929,18 +961,22 @@
274
         fi
275
     fi
276
 fi
277
-if [ "$gpac" = "yes" ] ; then
278
+
279
+if [ "$lsmash" = "yes" ] ; then
280
+    mp4="lsmash"
281
+    LDFLAGSCLI="$LSMASH_LIBS $LDFLAGSCLI"
282
+    CFLAGS="$CFLAGS $LSMASH_CFLAGS"
283
+    define HAVE_LSMASH
284
+elif [ "$gpac" = "yes" ] ; then
285
+    mp4="gpac"
286
     define HAVE_GPAC
287
-    if cc_check gpac/isomedia.h "-Werror $GPAC_LIBS" "void *p; p = gf_malloc(1); gf_free(p);" ; then
288
-        define HAVE_GF_MALLOC
289
-    fi
290
     LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI"
291
 fi
292
 
293
 if [ "$avs" = "auto" ] ; then
294
     avs="no"
295
     # cygwin can use avisynth if it can use LoadLibrary
296
-    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
297
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
298
         avs="avisynth"
299
         define HAVE_AVS
300
         define USE_AVXSYNTH 0
301
@@ -1038,7 +1074,7 @@
302
     fi
303
     log_ok
304
     # cygwin can use opencl if it can use LoadLibrary
305
-    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
306
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
307
         opencl="yes"
308
         define HAVE_OPENCL
309
     elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
310
@@ -1129,6 +1165,7 @@
311
 AR=$AR
312
 RANLIB=$RANLIB
313
 STRIP=$STRIP
314
+INSTALL=$INSTALL
315
 AS=$AS
316
 ASFLAGS=$ASFLAGS
317
 RC=$RC
318
@@ -1219,8 +1256,8 @@
319
 Name: x264
320
 Description: H.264 (MPEG4 AVC) encoder library
321
 Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
322
-Libs: -L$libdir -lx264
323
-Libs.private: $libpthread $libm $libdl
324
+Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
325
+Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
326
 Cflags: -I$includedir
327
 EOF
328
 
329
@@ -1241,7 +1278,7 @@
330
 avs:           $avs
331
 lavf:          $lavf
332
 ffms:          $ffms
333
-gpac:          $gpac
334
+mp4:           $mp4
335
 gpl:           $gpl
336
 thread:        $thread
337
 opencl:        $opencl
338
@@ -1250,7 +1287,6 @@
339
 gprof:         $gprof
340
 strip:         $strip
341
 PIC:           $pic
342
-visualize:     $vis
343
 bit depth:     $bit_depth
344
 chroma format: $chroma_format
345
 EOF
346
x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.c Changed
193
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * analyse.c: macroblock analysis
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -436,7 +436,7 @@
11
     /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
12
      * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
13
     uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
14
-    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
15
+    a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
16
 
17
     a->b_fast_intra = 0;
18
     a->b_avoid_topright = 0;
19
@@ -618,6 +618,24 @@
20
     {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
21
 };
22
 
23
+static const int8_t i8x8_mode_available[2][5][10] =
24
+{
25
+    {
26
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
27
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
28
+        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
29
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
30
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
31
+    },
32
+    {
33
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
34
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
35
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
36
+        {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1},
37
+        {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
38
+    }
39
+};
40
+
41
 static const int8_t i4x4_mode_available[2][5][10] =
42
 {
43
     {
44
@@ -632,7 +650,7 @@
45
         {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
46
         {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
47
         {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
48
-        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
49
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1},
50
     }
51
 };
52
 
53
@@ -655,7 +673,7 @@
54
     int avoid_topright = force_intra && (i&1);
55
     int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
56
     idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
57
-    return i4x4_mode_available[avoid_topright][idx];
58
+    return i8x8_mode_available[avoid_topright][idx];
59
 }
60
 
61
 static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
62
@@ -793,58 +811,60 @@
63
     int lambda = a->i_lambda;
64
 
65
     /*---------------- Try all mode and calculate their score ---------------*/
66
+    /* Disabled i16x16 for AVC-Intra compat */
67
+    if( !h->param.i_avcintra_class )
68
+    {
69
+        const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
70
 
71
-    /* 16x16 prediction selection */
72
-    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
73
+        /* Not heavily tuned */
74
+        static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
75
+        int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
76
 
77
-    /* Not heavily tuned */
78
-    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
79
-    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
80
-
81
-    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
82
-    {
83
-        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
84
-        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
85
-        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
86
-        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
87
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
88
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
89
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
90
-
91
-        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
92
-        if( a->i_satd_i16x16 <= i16x16_thresh )
93
-        {
94
-            h->predict_16x16[I_PRED_16x16_P]( p_dst );
95
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
96
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
97
-            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
98
+        if( !h->mb.b_lossless && predict_mode[3] >= 0 )
99
+        {
100
+            h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
101
+            a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
102
+            a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
103
+            a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
104
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
105
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
106
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
107
+
108
+            /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
109
+            if( a->i_satd_i16x16 <= i16x16_thresh )
110
+            {
111
+                h->predict_16x16[I_PRED_16x16_P]( p_dst );
112
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
113
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
114
+                COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
115
+            }
116
         }
117
-    }
118
-    else
119
-    {
120
-        for( ; *predict_mode >= 0; predict_mode++ )
121
+        else
122
         {
123
-            int i_satd;
124
-            int i_mode = *predict_mode;
125
+            for( ; *predict_mode >= 0; predict_mode++ )
126
+            {
127
+                int i_satd;
128
+                int i_mode = *predict_mode;
129
 
130
-            if( h->mb.b_lossless )
131
-                x264_predict_lossless_16x16( h, 0, i_mode );
132
-            else
133
-                h->predict_16x16[i_mode]( p_dst );
134
+                if( h->mb.b_lossless )
135
+                    x264_predict_lossless_16x16( h, 0, i_mode );
136
+                else
137
+                    h->predict_16x16[i_mode]( p_dst );
138
 
139
-            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
140
-                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
141
-            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
142
-            a->i_satd_i16x16_dir[i_mode] = i_satd;
143
+                i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
144
+                         lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
145
+                COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
146
+                a->i_satd_i16x16_dir[i_mode] = i_satd;
147
+            }
148
         }
149
-    }
150
 
151
-    if( h->sh.i_type == SLICE_TYPE_B )
152
-        /* cavlc mb type prefix */
153
-        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
154
+        if( h->sh.i_type == SLICE_TYPE_B )
155
+            /* cavlc mb type prefix */
156
+            a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
157
 
158
-    if( a->i_satd_i16x16 > i16x16_thresh )
159
-        return;
160
+        if( a->i_satd_i16x16 > i16x16_thresh )
161
+            return;
162
+    }
163
 
164
     uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
165
     /* 8x8 prediction selection */
166
@@ -870,7 +890,7 @@
167
             int i_best = COST_MAX;
168
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
169
 
170
-            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
171
+            const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
172
             h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
173
 
174
             if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
175
@@ -985,7 +1005,7 @@
176
             int i_best = COST_MAX;
177
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
178
 
179
-            predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
180
+            const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
181
 
182
             if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
183
                 /* emulate missing topright samples */
184
@@ -2101,7 +2121,7 @@
185
         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
186
                    + ref_costs + l0_mv_cost + l1_mv_cost;
187
 
188
-        if( h->mb.b_chroma_me )
189
+        if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
190
         {
191
             ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
192
 
193
x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * analyse.h: macroblock analysis
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cabac.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cabac.c: cabac bitstream writing
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cavlc.c Changed
20
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cavlc.c: cavlc bitstream writing
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -500,6 +500,9 @@
11
         && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
12
     {
13
         bs_write1( s, MB_INTERLACED );
14
+#if !RDO_SKIP_BS
15
+        h->mb.field_decoding_flag = MB_INTERLACED;
16
+#endif
17
     }
18
 
19
 #if !RDO_SKIP_BS
20
x264-snapshot-20130723-2245.tar.bz2/encoder/encoder.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/encoder.c Changed
923
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * encoder.c: top-level encoder functions
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -33,10 +33,6 @@
11
 #include "macroblock.h"
12
 #include "me.h"
13
 
14
-#if HAVE_VISUALIZE
15
-#include "common/visualize.h"
16
-#endif
17
-
18
 //#define DEBUG_MB_TYPE
19
 
20
 #define bs_write_ue bs_write_ue_big
21
@@ -82,7 +78,7 @@
22
 
23
 static void x264_frame_dump( x264_t *h )
24
 {
25
-    FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
26
+    FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
27
     if( !f )
28
         return;
29
 
30
@@ -403,21 +399,6 @@
31
 {
32
     if( h->param.i_sync_lookahead )
33
         x264_lower_thread_priority( 10 );
34
-
35
-#if HAVE_MMX
36
-    /* Misalign mask has to be set separately for each thread. */
37
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
38
-        x264_cpu_mask_misalign_sse();
39
-#endif
40
-}
41
-
42
-static void x264_lookahead_thread_init( x264_t *h )
43
-{
44
-#if HAVE_MMX
45
-    /* Misalign mask has to be set separately for each thread. */
46
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
47
-        x264_cpu_mask_misalign_sse();
48
-#endif
49
 }
50
 #endif
51
 
52
@@ -486,7 +467,7 @@
53
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
54
         return -1;
55
     }
56
-    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 )
57
+    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 )
58
     {
59
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" );
60
         return -1;
61
@@ -532,6 +513,12 @@
62
         return -1;
63
     }
64
 
65
+    if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 )
66
+    {
67
+        h->param.vui.i_sar_width = 0;
68
+        h->param.vui.i_sar_height = 0;
69
+    }
70
+
71
     if( h->param.i_threads == X264_THREADS_AUTO )
72
         h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
73
     int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
74
@@ -583,6 +570,8 @@
75
     {
76
         h->param.b_intra_refresh = 0;
77
         h->param.analyse.i_weighted_pred = 0;
78
+        h->param.i_frame_reference = 1;
79
+        h->param.i_dpb_size = 1;
80
     }
81
 
82
     h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
83
@@ -616,6 +605,188 @@
84
         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
85
         return -1;
86
     }
87
+
88
+    if( PARAM_INTERLACED )
89
+        h->param.b_pic_struct = 1;
90
+
91
+    if( h->param.i_avcintra_class )
92
+    {
93
+        if( BIT_DEPTH != 10 )
94
+        {
95
+            x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH );
96
+            x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" );
97
+            return -1;
98
+        }
99
+
100
+        int type = h->param.i_avcintra_class == 200 ? 2 :
101
+                   h->param.i_avcintra_class == 100 ? 1 :
102
+                   h->param.i_avcintra_class == 50 ? 0 : -1;
103
+        if( type < 0 )
104
+        {
105
+            x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" );
106
+            return -1;
107
+        }
108
+
109
+        /* [50/100/200][res][fps] */
110
+        static const struct
111
+        {
112
+            uint16_t fps_num;
113
+            uint16_t fps_den;
114
+            uint8_t interlaced;
115
+            uint16_t frame_size;
116
+            const uint8_t *cqm_4ic;
117
+            const uint8_t *cqm_8iy;
118
+        } avcintra_lut[3][2][7] =
119
+        {
120
+            {{{ 60000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
121
+              {    50,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
122
+              { 30000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
123
+              {    25,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
124
+              { 24000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }},
125
+             {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
126
+              {    25,    1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
127
+              { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
128
+              { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
129
+              {    50,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
130
+              {    25,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
131
+              { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}},
132
+            {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
133
+              {    50,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
134
+              { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
135
+              {    25,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
136
+              { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
137
+             {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
138
+              {    25,    1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
139
+              { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
140
+              { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
141
+              {    50,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
142
+              {    25,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
143
+              { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}},
144
+            {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
145
+              {    50,    1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
146
+             {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
147
+              {    25,    1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
148
+              { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
149
+              { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
150
+              {    50,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
151
+              {    25,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
152
+              { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}
153
+        };
154
+
155
+        int res = -1;
156
+        if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type )
157
+        {
158
+            if(      h->param.i_width == 1440 && h->param.i_height == 1080 ) res =  1;
159
+            else if( h->param.i_width ==  960 && h->param.i_height ==  720 ) res =  0;
160
+        }
161
+        else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type )
162
+        {
163
+            if(      h->param.i_width == 1920 && h->param.i_height == 1080 ) res =  1;
164
+            else if( h->param.i_width == 1280 && h->param.i_height ==  720 ) res =  0;
165
+        }
166
+        else
167
+        {
168
+            x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class );
169
+            return -1;
170
+        }
171
+
172
+        if( res < 0 )
173
+        {
174
+            x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n",
175
+                      h->param.i_width, h->param.i_height, h->param.i_avcintra_class );
176
+            return -1;
177
+        }
178
+
179
+        if( h->param.nalu_process )
180
+        {
181
+            x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" );
182
+            return -1;
183
+        }
184
+
185
+        if( !h->param.b_repeat_headers )
186
+        {
187
+            x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" );
188
+            return -1;
189
+        }
190
+
191
+        int i;
192
+        uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den;
193
+        x264_reduce_fraction( &fps_num, &fps_den );
194
+        for( i = 0; i < 7; i++ )
195
+        {
196
+            if( avcintra_lut[type][res][i].fps_num == fps_num &&
197
+                avcintra_lut[type][res][i].fps_den == fps_den &&
198
+                avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED )
199
+            {
200
+                break;
201
+            }
202
+        }
203
+        if( i == 7 )
204
+        {
205
+            x264_log( h, X264_LOG_ERROR, "FPS %d/%d%c not compatible with AVC-Intra\n",
206
+                      h->param.i_fps_num, h->param.i_fps_den, PARAM_INTERLACED ? 'i' : 'p' );
207
+            return -1;
208
+        }
209
+
210
+        h->param.i_keyint_max = 1;
211
+        h->param.b_intra_refresh = 0;
212
+        h->param.analyse.i_weighted_pred = 0;
213
+        h->param.i_frame_reference = 1;
214
+        h->param.i_dpb_size = 1;
215
+
216
+        h->param.b_bluray_compat = 0;
217
+        h->param.b_vfr_input = 0;
218
+        h->param.b_aud = 1;
219
+        h->param.vui.i_chroma_loc = 0;
220
+        h->param.i_nal_hrd = X264_NAL_HRD_NONE;
221
+        h->param.b_deblocking_filter = 0;
222
+        h->param.b_stitchable = 1;
223
+        h->param.b_pic_struct = 0;
224
+        h->param.analyse.b_transform_8x8 = 1;
225
+        h->param.analyse.intra = X264_ANALYSE_I8x8;
226
+        h->param.analyse.i_chroma_qp_offset = res && type ? 3 : 4;
227
+        h->param.b_cabac = !type;
228
+        h->param.rc.i_vbv_buffer_size = avcintra_lut[type][res][i].frame_size;
229
+        h->param.rc.i_vbv_max_bitrate =
230
+        h->param.rc.i_bitrate = h->param.rc.i_vbv_buffer_size * fps_num / fps_den;
231
+        h->param.rc.i_rc_method = X264_RC_ABR;
232
+        h->param.rc.f_vbv_buffer_init = 1.0;
233
+        h->param.rc.b_filler = 1;
234
+        h->param.i_cqm_preset = X264_CQM_CUSTOM;
235
+        memcpy( h->param.cqm_4iy, x264_cqm_jvt4i, sizeof(h->param.cqm_4iy) );
236
+        memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) );
237
+        memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) );
238
+
239
+        /* Need exactly 10 slices of equal MB count... why?  $deity knows... */
240
+        h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10;
241
+        h->param.i_slice_max_size = 0;
242
+        /* The slice structure only allows a maximum of 2 threads for 1080i/p
243
+         * and 1 or 5 threads for 720p */
244
+        if( h->param.b_sliced_threads )
245
+        {
246
+            if( res )
247
+                h->param.i_threads = X264_MIN( 2, h->param.i_threads );
248
+            else
249
+            {
250
+                h->param.i_threads = X264_MIN( 5, h->param.i_threads );
251
+                if( h->param.i_threads < 5 )
252
+                    h->param.i_threads = 1;
253
+            }
254
+        }
255
+
256
+        if( type )
257
+            h->param.vui.i_sar_width = h->param.vui.i_sar_height = 1;
258
+        else
259
+        {
260
+            h->param.vui.i_sar_width  = 4;
261
+            h->param.vui.i_sar_height = 3;
262
+        }
263
+
264
+        /* Official encoder doesn't appear to go under 13
265
+         * and Avid cannot handle negative QPs */
266
+        h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 );
267
+    }
268
+
269
     h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
270
     h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
271
     h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
272
@@ -917,10 +1088,10 @@
273
         h->param.analyse.i_chroma_qp_offset += 6;
274
     /* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
275
     /* so we lower the chroma QP offset to compensate */
276
-    if( b_open && h->mb.i_psy_rd )
277
+    if( b_open && h->mb.i_psy_rd && !h->param.i_avcintra_class )
278
         h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
279
     /* Psy trellis has a similar effect. */
280
-    if( b_open && h->mb.i_psy_trellis )
281
+    if( b_open && h->mb.i_psy_trellis && !h->param.i_avcintra_class )
282
         h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
283
     h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
284
     /* MB-tree requires AQ to be on, even if the strength is zero. */
285
@@ -1041,9 +1212,6 @@
286
 
287
     h->param.i_sps_id &= 31;
288
 
289
-    if( PARAM_INTERLACED )
290
-        h->param.b_pic_struct = 1;
291
-
292
     h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR );
293
 
294
     if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size )
295
@@ -1059,6 +1227,9 @@
296
         h->param.i_nal_hrd = X264_NAL_HRD_VBR;
297
     }
298
 
299
+    if( h->param.i_nal_hrd == X264_NAL_HRD_CBR )
300
+        h->param.rc.b_filler = 1;
301
+
302
     /* ensure the booleans are 0 or 1 so they can be used in math */
303
 #define BOOLIFY(x) h->param.x = !!h->param.x
304
     BOOLIFY( b_cabac );
305
@@ -1068,7 +1239,6 @@
306
     BOOLIFY( b_sliced_threads );
307
     BOOLIFY( b_interlaced );
308
     BOOLIFY( b_intra_refresh );
309
-    BOOLIFY( b_visualize );
310
     BOOLIFY( b_aud );
311
     BOOLIFY( b_repeat_headers );
312
     BOOLIFY( b_annexb );
313
@@ -1094,6 +1264,7 @@
314
     BOOLIFY( rc.b_stat_write );
315
     BOOLIFY( rc.b_stat_read );
316
     BOOLIFY( rc.b_mb_tree );
317
+    BOOLIFY( rc.b_filler );
318
 #undef BOOLIFY
319
 
320
     return 0;
321
@@ -1187,7 +1358,6 @@
322
                 h->param.vui.i_sar_width = i_w;
323
                 h->param.vui.i_sar_height = i_h;
324
             }
325
-            x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
326
         }
327
     }
328
 }
329
@@ -1241,11 +1411,11 @@
330
         goto fail;
331
     }
332
 
333
+    x264_set_aspect_ratio( h, &h->param, 1 );
334
+
335
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
336
     x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps );
337
 
338
-    x264_set_aspect_ratio( h, &h->param, 1 );
339
-
340
     x264_validate_levels( h, 1 );
341
 
342
     h->chroma_qp_table = i_chroma_qp_table + 12 + h->pps->i_chroma_qp_index_offset;
343
@@ -1396,11 +1566,13 @@
344
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */
345
     CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
346
 
347
+    CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) );
348
+
349
     if( h->param.i_threads > 1 &&
350
         x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
351
         goto fail;
352
     if( h->param.i_lookahead_threads > 1 &&
353
-        x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
354
+        x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) )
355
         goto fail;
356
 
357
 #if HAVE_OPENCL
358
@@ -1424,6 +1596,7 @@
359
             CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) );
360
             *h->lookahead_thread[i] = *h;
361
         }
362
+    *h->reconfig_h = *h;
363
 
364
     for( int i = 0; i < h->param.i_threads; i++ )
365
     {
366
@@ -1479,7 +1652,7 @@
367
     if( h->param.psz_dump_yuv )
368
     {
369
         /* create or truncate the reconstructed video file */
370
-        FILE *f = fopen( h->param.psz_dump_yuv, "w" );
371
+        FILE *f = x264_fopen( h->param.psz_dump_yuv, "w" );
372
         if( !f )
373
         {
374
             x264_log( h, X264_LOG_ERROR, "dump_yuv: can't write to %s\n", h->param.psz_dump_yuv );
375
@@ -1523,18 +1696,10 @@
376
     return NULL;
377
 }
378
 
379
-/****************************************************************************
380
- * x264_encoder_reconfig:
381
- ****************************************************************************/
382
-int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
383
+/****************************************************************************/
384
+static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig )
385
 {
386
-    /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */
387
-    if( h->param.b_sliced_threads )
388
-        if( x264_threadpool_wait_all( h ) < 0 )
389
-            return -1;
390
-
391
-    int rc_reconfig = 0;
392
-    h = h->thread[h->thread[0]->i_thread_phase];
393
+    *rc_reconfig = 0;
394
     x264_set_aspect_ratio( h, param, 0 );
395
 #define COPY(var) h->param.var = param->var
396
     COPY( i_frame_reference ); // but never uses more refs than initially specified
397
@@ -1583,22 +1748,30 @@
398
     if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 &&
399
           param->rc.i_vbv_max_bitrate > 0 &&   param->rc.i_vbv_buffer_size > 0 )
400
     {
401
-        rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
402
-        rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
403
-        rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
404
+        *rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
405
+        *rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
406
+        *rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
407
         COPY( rc.i_vbv_max_bitrate );
408
         COPY( rc.i_vbv_buffer_size );
409
         COPY( rc.i_bitrate );
410
     }
411
-    rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
412
-    rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
413
+    *rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
414
+    *rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
415
     COPY( rc.f_rf_constant );
416
     COPY( rc.f_rf_constant_max );
417
 #undef COPY
418
 
419
-    mbcmp_init( h );
420
+    return x264_validate_parameters( h, 0 );
421
+}
422
 
423
-    int ret = x264_validate_parameters( h, 0 );
424
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param )
425
+{
426
+    int rc_reconfig;
427
+    int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig );
428
+
429
+    mbcmp_init( h );
430
+    if( !ret )
431
+        x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
432
 
433
     /* Supported reconfiguration options (1-pass only):
434
      * vbv-maxrate
435
@@ -1612,6 +1785,25 @@
436
 }
437
 
438
 /****************************************************************************
439
+ * x264_encoder_reconfig:
440
+ ****************************************************************************/
441
+int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
442
+{
443
+    h = h->thread[h->thread[0]->i_thread_phase];
444
+    x264_param_t param_save = h->reconfig_h->param;
445
+    h->reconfig_h->param = h->param;
446
+
447
+    int rc_reconfig;
448
+    int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig );
449
+    if( !ret )
450
+        h->reconfig = 1;
451
+    else
452
+        h->reconfig_h->param = param_save;
453
+
454
+    return ret;
455
+}
456
+
457
+/****************************************************************************
458
  * x264_encoder_parameters:
459
  ****************************************************************************/
460
 void x264_encoder_parameters( x264_t *h, x264_param_t *param )
461
@@ -1630,6 +1822,7 @@
462
 
463
     nal->i_payload= 0;
464
     nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
465
+    nal->i_padding= 0;
466
 }
467
 
468
 /* if number of allocated nals is not enough, re-allocate a larger one. */
469
@@ -1663,6 +1856,30 @@
470
     return x264_nal_check_buffer( h );
471
 }
472
 
473
+static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start,
474
+                                           int previous_nal_size, int necessary_size )
475
+{
476
+    if( h0->nal_buffer_size < necessary_size )
477
+    {
478
+        necessary_size *= 2;
479
+        uint8_t *buf = x264_malloc( necessary_size );
480
+        if( !buf )
481
+            return -1;
482
+        if( previous_nal_size )
483
+            memcpy( buf, h0->nal_buffer, previous_nal_size );
484
+
485
+        intptr_t delta = buf - h0->nal_buffer;
486
+        for( int i = 0; i < start; i++ )
487
+            h->out.nal[i].p_payload += delta;
488
+
489
+        x264_free( h0->nal_buffer );
490
+        h0->nal_buffer = buf;
491
+        h0->nal_buffer_size = necessary_size;
492
+    }
493
+
494
+    return 0;
495
+}
496
+
497
 static int x264_encoder_encapsulate_nals( x264_t *h, int start )
498
 {
499
     x264_t *h0 = h->thread[0];
500
@@ -1683,31 +1900,31 @@
501
 
502
     /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
503
     int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64;
504
-    if( h0->nal_buffer_size < necessary_size )
505
-    {
506
-        necessary_size *= 2;
507
-        uint8_t *buf = x264_malloc( necessary_size );
508
-        if( !buf )
509
-            return -1;
510
-        if( previous_nal_size )
511
-            memcpy( buf, h0->nal_buffer, previous_nal_size );
512
-
513
-        intptr_t delta = buf - h0->nal_buffer;
514
-        for( int i = 0; i < start; i++ )
515
-            h->out.nal[i].p_payload += delta;
516
-
517
-        x264_free( h0->nal_buffer );
518
-        h0->nal_buffer = buf;
519
-        h0->nal_buffer_size = necessary_size;
520
-    }
521
+    for( int i = start; i < h->out.i_nal; i++ )
522
+        necessary_size += h->out.nal[i].i_padding;
523
+    if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
524
+        return -1;
525
 
526
     uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size;
527
 
528
     for( int i = start; i < h->out.i_nal; i++ )
529
     {
530
-        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
531
+        int old_payload_len = h->out.nal[i].i_payload;
532
+        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS ||
533
+                                         h->param.i_avcintra_class;
534
         x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
535
         nal_buffer += h->out.nal[i].i_payload;
536
+        if( h->param.i_avcintra_class )
537
+        {
538
+            h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD);
539
+            if( h->out.nal[i].i_padding > 0 )
540
+            {
541
+                memset( nal_buffer, 0, h->out.nal[i].i_padding );
542
+                nal_buffer += h->out.nal[i].i_padding;
543
+                h->out.nal[i].i_payload += h->out.nal[i].i_padding;
544
+            }
545
+            h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 );
546
+        }
547
     }
548
 
549
     x264_emms();
550
@@ -2340,7 +2557,7 @@
551
     }
552
 }
553
 
554
-static int x264_slice_write( x264_t *h )
555
+static intptr_t x264_slice_write( x264_t *h )
556
 {
557
     int i_skip;
558
     int mb_xy, i_mb_x, i_mb_y;
559
@@ -2350,7 +2567,8 @@
560
      * other inaccuracies. */
561
     int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
562
     int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
563
-    int back_up_bitstream = slice_max_size || (!h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH);
564
+    int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH;
565
+    int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc;
566
     int starting_bits = bs_pos(&h->out.bs);
567
     int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
568
     int b_hpel = h->fdec->b_kept_as_ref;
569
@@ -2358,9 +2576,10 @@
570
     int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1;
571
     uint8_t *last_emu_check;
572
 #define BS_BAK_SLICE_MAX_SIZE 0
573
-#define BS_BAK_SLICE_MIN_MBS  1
574
-#define BS_BAK_ROW_VBV        2
575
-    x264_bs_bak_t bs_bak[3];
576
+#define BS_BAK_CAVLC_OVERFLOW 1
577
+#define BS_BAK_SLICE_MIN_MBS  2
578
+#define BS_BAK_ROW_VBV        3
579
+    x264_bs_bak_t bs_bak[4];
580
     b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
581
     bs_realign( &h->out.bs );
582
 
583
@@ -2413,11 +2632,16 @@
584
                 x264_fdec_filter_row( h, i_mb_y, 0 );
585
         }
586
 
587
-        if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
588
+        if( back_up_bitstream )
589
         {
590
-            x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
591
-            if( slice_max_size && (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
592
-                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
593
+            if( back_up_bitstream_cavlc )
594
+                x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
595
+            if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
596
+            {
597
+                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
598
+                if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
599
+                    x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
600
+            }
601
         }
602
 
603
         if( PARAM_INTERLACED )
604
@@ -2481,7 +2705,7 @@
605
                     h->mb.i_skip_intra = 0;
606
                     h->mb.b_skip_mc = 0;
607
                     h->mb.b_overflow = 0;
608
-                    x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
609
+                    x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
610
                     goto reencode;
611
                 }
612
             }
613
@@ -2552,11 +2776,6 @@
614
 cont:
615
         h->mb.b_reencode_mb = 0;
616
 
617
-#if HAVE_VISUALIZE
618
-        if( h->param.b_visualize )
619
-            x264_visualize_mb( h );
620
-#endif
621
-
622
         /* save cache */
623
         x264_macroblock_cache_save( h );
624
 
625
@@ -2732,10 +2951,11 @@
626
     x264_frame_push_unused( src, dst->fdec );
627
 
628
     // copy everything except the per-thread pointers and the constants.
629
-    memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) );
630
+    memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.base) - offsetof(x264_t, i_frame) );
631
     dst->param = src->param;
632
     dst->stat = src->stat;
633
     dst->pixf = src->pixf;
634
+    dst->reconfig = src->reconfig;
635
 }
636
 
637
 static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
638
@@ -2750,12 +2970,6 @@
639
     int i_slice_num = 0;
640
     int last_thread_mb = h->sh.i_last_mb;
641
 
642
-#if HAVE_VISUALIZE
643
-    if( h->param.b_visualize )
644
-        if( x264_visualize_init( h ) )
645
-            goto fail;
646
-#endif
647
-
648
     /* init stats */
649
     memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
650
     h->mb.b_reencode_mb = 0;
651
@@ -2801,14 +3015,6 @@
652
             h->sh.i_first_mb -= h->mb.i_mb_stride;
653
     }
654
 
655
-#if HAVE_VISUALIZE
656
-    if( h->param.b_visualize )
657
-    {
658
-        x264_visualize_show( h );
659
-        x264_visualize_close( h );
660
-    }
661
-#endif
662
-
663
     return (void *)0;
664
 
665
 fail:
666
@@ -2949,10 +3155,6 @@
667
         thread_current =
668
         thread_oldest  = h;
669
     }
670
-#if HAVE_MMX
671
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
672
-        x264_cpu_mask_misalign_sse();
673
-#endif
674
     h->i_cpb_delay_pir_offset = h->i_cpb_delay_pir_offset_next;
675
 
676
     /* no data out */
677
@@ -3058,9 +3260,14 @@
678
 
679
     if( h->i_frame == h->i_thread_frames - 1 )
680
         h->i_reordered_pts_delay = h->fenc->i_reordered_pts;
681
+    if( h->reconfig )
682
+    {
683
+        x264_encoder_reconfig_apply( h, &h->reconfig_h->param );
684
+        h->reconfig = 0;
685
+    }
686
     if( h->fenc->param )
687
     {
688
-        x264_encoder_reconfig( h, h->fenc->param );
689
+        x264_encoder_reconfig_apply( h, h->fenc->param );
690
         if( h->fenc->param->param_free )
691
         {
692
             h->fenc->param->param_free( h->fenc->param );
693
@@ -3207,7 +3414,7 @@
694
         bs_rbsp_trailing( &h->out.bs );
695
         if( x264_nal_end( h ) )
696
             return -1;
697
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
698
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
699
     }
700
 
701
     h->i_nal_type = i_nal_type;
702
@@ -3259,14 +3466,19 @@
703
             x264_sps_write( &h->out.bs, h->sps );
704
             if( x264_nal_end( h ) )
705
                 return -1;
706
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
707
+            /* Pad AUD/SPS to 256 bytes like Panasonic */
708
+            if( h->param.i_avcintra_class )
709
+                h->out.nal[h->out.i_nal-1].i_padding = 256 - bs_pos( &h->out.bs ) / 8 - 2*NALU_OVERHEAD;
710
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
711
 
712
             /* generate picture parameters */
713
             x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
714
             x264_pps_write( &h->out.bs, h->sps, h->pps );
715
             if( x264_nal_end( h ) )
716
                 return -1;
717
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
718
+            if( h->param.i_avcintra_class )
719
+                h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD;
720
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
721
         }
722
 
723
         /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */
724
@@ -3277,7 +3489,7 @@
725
             x264_sei_buffering_period_write( h, &h->out.bs );
726
             if( x264_nal_end( h ) )
727
                return -1;
728
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
729
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
730
         }
731
     }
732
 
733
@@ -3289,7 +3501,7 @@
734
                         h->fenc->extra_sei.payloads[i].payload_type );
735
         if( x264_nal_end( h ) )
736
             return -1;
737
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
738
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
739
         if( h->fenc->extra_sei.sei_free )
740
         {
741
             h->fenc->extra_sei.sei_free( h->fenc->extra_sei.payloads[i].payload );
742
@@ -3306,7 +3518,8 @@
743
 
744
     if( h->fenc->b_keyframe )
745
     {
746
-        if( h->param.b_repeat_headers && h->fenc->i_frame == 0 )
747
+        /* Avid's decoder strictly wants two SEIs for AVC-Intra so we can't insert the x264 SEI */
748
+        if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class )
749
         {
750
             /* identify ourself */
751
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
752
@@ -3314,7 +3527,7 @@
753
                 return -1;
754
             if( x264_nal_end( h ) )
755
                 return -1;
756
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
757
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
758
         }
759
 
760
         if( h->fenc->i_type != X264_TYPE_IDR )
761
@@ -3324,16 +3537,16 @@
762
             x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
763
             if( x264_nal_end( h ) )
764
                 return -1;
765
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
766
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
767
         }
768
 
769
-        if ( h->param.i_frame_packing >= 0 )
770
+        if( h->param.i_frame_packing >= 0 )
771
         {
772
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
773
             x264_sei_frame_packing_write( h, &h->out.bs );
774
             if( x264_nal_end( h ) )
775
                 return -1;
776
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
777
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
778
         }
779
     }
780
 
781
@@ -3344,7 +3557,7 @@
782
         x264_sei_pic_timing_write( h, &h->out.bs );
783
         if( x264_nal_end( h ) )
784
             return -1;
785
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
786
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
787
     }
788
 
789
     /* As required by Blu-ray. */
790
@@ -3355,12 +3568,54 @@
791
         x264_sei_dec_ref_pic_marking_write( h, &h->out.bs );
792
         if( x264_nal_end( h ) )
793
             return -1;
794
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
795
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
796
     }
797
 
798
     if( h->fenc->b_keyframe && h->param.b_intra_refresh )
799
         h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay;
800
 
801
+    /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */
802
+    if( h->param.i_avcintra_class )
803
+    {
804
+        /* Write an empty filler NAL to mimic the AUD in the P2 format*/
805
+        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
806
+        x264_filler_write( h, &h->out.bs, 0 );
807
+        if( x264_nal_end( h ) )
808
+            return -1;
809
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
810
+
811
+        /* All lengths are magic lengths that decoders expect to see */
812
+        /* "UMID" SEI */
813
+        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
814
+        if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 )
815
+            return -1;
816
+        if( x264_nal_end( h ) )
817
+            return -1;
818
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
819
+
820
+        int unpadded_len;
821
+        int total_len;
822
+        if( h->param.i_height == 1080 )
823
+        {
824
+            unpadded_len = 5780;
825
+            total_len = 17*512;
826
+        }
827
+        else
828
+        {
829
+            unpadded_len = 2900;
830
+            total_len = 9*512;
831
+        }
832
+        /* "VANC" SEI */
833
+        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
834
+        if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 )
835
+            return -1;
836
+        if( x264_nal_end( h ) )
837
+            return -1;
838
+
839
+        h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD;
840
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + SEI_OVERHEAD;
841
+    }
842
+
843
     /* Init the rate control */
844
     /* FIXME: Include slice header bit cost. */
845
     x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 );
846
@@ -3490,30 +3745,46 @@
847
     pic_out->hrd_timing = h->fenc->hrd_timing;
848
     pic_out->prop.f_crf_avg = h->fdec->f_crf_avg;
849
 
850
-    while( filler > 0 )
851
+    /* Filler in AVC-Intra mode is written as zero bytes to the last slice
852
+     * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */
853
+    if( h->param.i_avcintra_class )
854
+    {
855
+        x264_t *h0 = h->thread[0];
856
+        int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler );
857
+        if( ret < 0 )
858
+            return -1;
859
+        memset( h->out.nal[0].p_payload + frame_size, 0, filler );
860
+        h->out.nal[h->out.i_nal-1].i_payload += filler;
861
+        h->out.nal[h->out.i_nal-1].i_padding = filler;
862
+        frame_size += filler;
863
+    }
864
+    else
865
     {
866
-        int f, overhead;
867
-        overhead = (FILLER_OVERHEAD - h->param.b_annexb);
868
-        if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
869
-        {
870
-            int next_size = filler - h->param.i_slice_max_size;
871
-            int overflow = X264_MAX( overhead - next_size, 0 );
872
-            f = h->param.i_slice_max_size - overhead - overflow;
873
-        }
874
-        else
875
-            f = X264_MAX( 0, filler - overhead );
876
+        while( filler > 0 )
877
+        {
878
+            int f, overhead;
879
+            overhead = (FILLER_OVERHEAD - h->param.b_annexb);
880
+            if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
881
+            {
882
+                int next_size = filler - h->param.i_slice_max_size;
883
+                int overflow = X264_MAX( overhead - next_size, 0 );
884
+                f = h->param.i_slice_max_size - overhead - overflow;
885
+            }
886
+            else
887
+                f = X264_MAX( 0, filler - overhead );
888
 
889
-        if( x264_bitstream_check_buffer_filler( h, f ) )
890
-            return -1;
891
-        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
892
-        x264_filler_write( h, &h->out.bs, f );
893
-        if( x264_nal_end( h ) )
894
-            return -1;
895
-        int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
896
-        if( total_size < 0 )
897
-            return -1;
898
-        frame_size += total_size;
899
-        filler -= total_size;
900
+            if( x264_bitstream_check_buffer_filler( h, f ) )
901
+                return -1;
902
+            x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
903
+            x264_filler_write( h, &h->out.bs, f );
904
+            if( x264_nal_end( h ) )
905
+                return -1;
906
+            int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
907
+            if( total_size < 0 )
908
+                return -1;
909
+            frame_size += total_size;
910
+            filler -= total_size;
911
+        }
912
     }
913
 
914
     /* End bitstream, set output  */
915
@@ -3985,6 +4256,7 @@
916
 
917
     x264_cqm_delete( h );
918
     x264_free( h->nal_buffer );
919
+    x264_free( h->reconfig_h );
920
     x264_analyse_free_costs( h );
921
 
922
     if( h->i_thread_frames > 1 )
923
x264-snapshot-20130723-2245.tar.bz2/encoder/lookahead.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/lookahead.c Changed
28
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * lookahead.c: high-level lookahead functions
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 Avail Media and x264 project
6
+ * Copyright (C) 2010-2014 Avail Media and x264 project
7
  *
8
  * Authors: Michael Kazmier <mkazmier@availmedia.com>
9
  *          Alex Giladi <agiladi@availmedia.com>
10
@@ -89,16 +89,11 @@
11
 
12
 static void *x264_lookahead_thread( x264_t *h )
13
 {
14
-    int shift;
15
-#if HAVE_MMX
16
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
17
-        x264_cpu_mask_misalign_sse();
18
-#endif
19
     while( !h->lookahead->b_exit_thread )
20
     {
21
         x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
22
         x264_pthread_mutex_lock( &h->lookahead->next.mutex );
23
-        shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
24
+        int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
25
         x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
26
         x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
27
         if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
28
x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.c Changed
28
 
1
@@ -1,12 +1,12 @@
2
 /*****************************************************************************
3
  * macroblock.c: macroblock encoding
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
11
- *          Henrik Gramner <hengar-6@student.ltu.se>
12
+ *          Henrik Gramner <henrik@gramner.com>
13
  *
14
  * This program is free software; you can redistribute it and/or modify
15
  * it under the terms of the GNU General Public License as published by
16
@@ -157,10 +157,7 @@
17
         return;
18
     }
19
 
20
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0;
21
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0;
22
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0;
23
-    M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0;
24
+    CLEAR_16x16_NNZ( p );
25
 
26
     h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
27
 
28
x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.h Changed
25
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * macroblock.h: macroblock encoding
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -98,10 +98,10 @@
11
 #define CLEAR_16x16_NNZ( p ) \
12
 do\
13
 {\
14
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
15
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
16
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
17
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
18
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
19
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
20
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
21
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
22
 } while(0)
23
 
24
 /* A special for loop that iterates branchlessly over each set
25
x264-snapshot-20130723-2245.tar.bz2/encoder/me.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.c Changed
38
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * me.c: motion estimation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -365,14 +365,14 @@
11
 
12
             /* hexagon */
13
             COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
14
-            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
15
+            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */
16
             bcost <<= 3;
17
             COPY1_IF_LT( bcost, (costs[0]<<3)+2 );
18
             COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
19
             COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
20
-            COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
21
-            COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
22
-            COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
23
+            COPY1_IF_LT( bcost, (costs[4]<<3)+5 );
24
+            COPY1_IF_LT( bcost, (costs[5]<<3)+6 );
25
+            COPY1_IF_LT( bcost, (costs[6]<<3)+7 );
26
 
27
             if( bcost&7 )
28
             {
29
@@ -671,7 +671,7 @@
30
                     for( i = 0; i < xn-2; i += 3 )
31
                     {
32
                         pixel *ref = p_fref_w+min_x+my*stride;
33
-                        int sads[3];
34
+                        ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */
35
                         h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
36
                         for( int j = 0; j < 3; j++ )
37
                         {
38
x264-snapshot-20130723-2245.tar.bz2/encoder/me.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * me.h: motion estimation
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.c Changed
171
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * ratecontrol.c: ratecontrol
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Michael Niedermayer <michaelni@gmx.at>
10
@@ -101,7 +101,7 @@
11
     double vbv_max_rate;        /* # of bits added to buffer_fill per second */
12
     predictor_t *pred;          /* predict frame size from satd */
13
     int single_frame_vbv;
14
-    double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
15
+    float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
16
 
17
     /* ABR stuff */
18
     int    last_satd;
19
@@ -653,8 +653,9 @@
20
                       h->param.rc.i_vbv_buffer_size );
21
         }
22
 
23
-        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
24
-        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
25
+        int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000;
26
+        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size;
27
+        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size;
28
 
29
         /* Init HRD */
30
         if( h->param.i_nal_hrd && b_init )
31
@@ -666,15 +667,12 @@
32
             #define BR_SHIFT  6
33
             #define CPB_SHIFT 4
34
 
35
-            int bitrate = 1000*h->param.rc.i_vbv_max_bitrate;
36
-            int bufsize = 1000*h->param.rc.i_vbv_buffer_size;
37
-
38
             // normalize HRD size and rate to the value / scale notation
39
-            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 );
40
-            h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
41
+            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 );
42
+            h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
43
             h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
44
-            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 );
45
-            h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
46
+            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 );
47
+            h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
48
             h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
49
 
50
             #undef CPB_SHIFT
51
@@ -705,7 +703,7 @@
52
         h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
53
 
54
         if( rc->b_vbv_min_rate )
55
-            rc->bitrate = h->param.rc.i_bitrate * 1000.;
56
+            rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size;
57
         rc->buffer_rate = vbv_max_bitrate / rc->fps;
58
         rc->vbv_max_rate = vbv_max_bitrate;
59
         rc->buffer_size = vbv_buffer_size;
60
@@ -761,7 +759,7 @@
61
     else
62
         rc->qcompress = h->param.rc.f_qcompress;
63
 
64
-    rc->bitrate = h->param.rc.i_bitrate * 1000.;
65
+    rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.);
66
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
67
     rc->nmb = h->mb.i_mb_count;
68
     rc->last_non_b_pict_type = -1;
69
@@ -872,7 +870,7 @@
70
             char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
71
             if( !mbtree_stats_in )
72
                 return -1;
73
-            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
74
+            rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
75
             x264_free( mbtree_stats_in );
76
             if( !rc->p_mbtree_stat_file_in )
77
             {
78
@@ -913,7 +911,7 @@
79
              * so we'll at least try to roughly approximate this effect. */
80
             res_factor_bits = powf( res_factor, 0.7 );
81
 
82
-            if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
83
+            if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
84
             {
85
                 x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" );
86
                 return -1;
87
@@ -1140,7 +1138,7 @@
88
         if( !rc->psz_stat_file_tmpname )
89
             return -1;
90
 
91
-        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
92
+        rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" );
93
         if( rc->p_stat_file_out == NULL )
94
         {
95
             x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
96
@@ -1158,7 +1156,7 @@
97
             if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
98
                 return -1;
99
 
100
-            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
101
+            rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
102
             if( rc->p_mbtree_stat_file_out == NULL )
103
             {
104
                 x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
105
@@ -1338,7 +1336,7 @@
106
         b_regular_file = x264_is_regular_file( rc->p_stat_file_out );
107
         fclose( rc->p_stat_file_out );
108
         if( h->i_frame >= rc->num_entries && b_regular_file )
109
-            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
110
+            if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
111
             {
112
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
113
                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
114
@@ -1350,7 +1348,7 @@
115
         b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out );
116
         fclose( rc->p_mbtree_stat_file_out );
117
         if( h->i_frame >= rc->num_entries && b_regular_file )
118
-            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
119
+            if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
120
             {
121
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
122
                           rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
123
@@ -1398,7 +1396,7 @@
124
     x264_emms();
125
 
126
     if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
127
-        x264_encoder_reconfig( h, zone->param );
128
+        x264_encoder_reconfig_apply( h, zone->param );
129
     rc->prev_zone = zone;
130
 
131
     if( h->param.rc.b_stat_read )
132
@@ -2108,15 +2106,25 @@
133
     rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
134
 
135
     if( rct->buffer_fill_final < 0 )
136
-        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
137
+    {
138
+        double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale;
139
+        if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment )
140
+            x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow );
141
+        else
142
+            x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow );
143
+    }
144
     rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
145
-    rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
146
 
147
-    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
148
+    if( h->param.i_avcintra_class )
149
+        rct->buffer_fill_final += buffer_size;
150
+    else
151
+        rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
152
+
153
+    if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size )
154
     {
155
         int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
156
         filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
157
-        bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
158
+        bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
159
         rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
160
     }
161
     else
162
@@ -2719,7 +2727,7 @@
163
      * we're adding or removing bits), and starting on the earliest frame that
164
      * can influence the buffer fill of that end frame. */
165
     x264_ratecontrol_t *rcc = h->rc;
166
-    const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
167
+    const double buffer_min = .1 * rcc->buffer_size;
168
     const double buffer_max = .9 * rcc->buffer_size;
169
     double fill = fills[*t0-1];
170
     double parity = over ? 1. : -1.;
171
x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.h Changed
18
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * ratecontrol.h: ratecontrol
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -43,6 +43,7 @@
11
 void x264_ratecontrol_delete( x264_t * );
12
 
13
 void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
14
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param );
15
 
16
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets );
17
 int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
18
x264-snapshot-20130723-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/rdo.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * rdo.c: rate-distortion optimization
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/set.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.c Changed
114
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * set: header writing
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -31,6 +31,7 @@
11
 
12
 // Indexed by pic_struct values
13
 static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
14
+const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
15
 
16
 static void transpose( uint8_t *buf, int w )
17
 {
18
@@ -91,7 +92,7 @@
19
     bs_write( s, 8, payload_size-i );
20
 
21
     for( i = 0; i < payload_size; i++ )
22
-        bs_write(s, 8, payload[i] );
23
+        bs_write( s, 8, payload[i] );
24
 
25
     bs_rbsp_trailing( s );
26
     bs_flush( s );
27
@@ -227,7 +228,8 @@
28
     }
29
 
30
     /* FIXME: not sufficient for interlaced video */
31
-    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
32
+    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
33
+                                         sps->i_chroma_format_idc == CHROMA_420;
34
     if( sps->vui.b_chroma_loc_info_present )
35
     {
36
         sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
37
@@ -249,7 +251,7 @@
38
 
39
     // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
40
 
41
-    sps->vui.b_bitstream_restriction = 1;
42
+    sps->vui.b_bitstream_restriction = param->i_keyint_max > 1;
43
     if( sps->vui.b_bitstream_restriction )
44
     {
45
         sps->vui.b_motion_vectors_over_pic_boundaries = 1;
46
@@ -421,7 +423,7 @@
47
     pps->i_sps_id = sps->i_id;
48
     pps->b_cabac = param->b_cabac;
49
 
50
-    pps->b_pic_order = param->b_interlaced;
51
+    pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
52
     pps->i_num_slice_groups = 1;
53
 
54
     pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
55
@@ -575,7 +577,7 @@
56
 
57
     memcpy( payload, uuid, 16 );
58
     sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
59
-             "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s",
60
+             "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s",
61
              X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
62
     length = strlen(payload)+1;
63
 
64
@@ -725,6 +727,49 @@
65
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
66
 }
67
 
68
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
69
+{
70
+    uint8_t data[512];
71
+    const char *msg = "UMID";
72
+    const int len = 497;
73
+
74
+    memset( data, 0xff, len );
75
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
76
+    memcpy( data+16, msg, strlen(msg) );
77
+
78
+    data[20] = 0x13;
79
+    /* These bytes appear to be some sort of frame/seconds counter in certain applications,
80
+     * but others jump around, so leave them as zero for now */
81
+    data[21] = data[22] = 0;
82
+
83
+    data[28] = 0x14;
84
+    data[36] = 0x60;
85
+    data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
86
+
87
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
88
+
89
+    return 0;
90
+}
91
+
92
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
93
+{
94
+    uint8_t data[6000];
95
+    const char *msg = "VANC";
96
+    if( len > sizeof(data) )
97
+    {
98
+        x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
99
+        return -1;
100
+    }
101
+
102
+    memset( data, 0xff, len );
103
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
104
+    memcpy( data+16, msg, strlen(msg) );
105
+
106
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
107
+
108
+    return 0;
109
+}
110
+
111
 const x264_level_t x264_levels[] =
112
 {
113
     { 10,    1485,    99,    396,     64,    175,  64, 64,  0, 2, 0, 0, 1 },
114
x264-snapshot-20130723-2245.tar.bz2/encoder/set.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.h Changed
19
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * set.h: header writing
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -38,6 +38,8 @@
11
 void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
12
 void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
13
 void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
14
+int  x264_sei_avcintra_umid_write( x264_t *h, bs_t *s );
15
+int  x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len );
16
 void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
17
 void x264_filler_write( x264_t *h, bs_t *s, int filler );
18
 
19
x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype-cl.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype-cl.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead)
4
  *****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
  *
8
  * Authors: Steve Borho <sborho@multicorewareinc.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype.c Changed
119
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * slicetype.c: lookahead analysis
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -1022,9 +1022,12 @@
11
     return i_score;
12
 }
13
 
14
+/* Trade off precision in mbtree for increased range */
15
+#define MBTREE_PRECISION 0.5f
16
+
17
 static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
18
 {
19
-    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
20
+    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
21
     float weightdelta = 0.0;
22
     if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
23
         weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
24
@@ -1051,11 +1054,12 @@
25
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
26
     int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
27
     int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
28
-    int *buf = h->scratch_buffer;
29
+    int16_t *buf = h->scratch_buffer;
30
     uint16_t *propagate_cost = frames[b]->i_propagate_cost;
31
+    uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
32
 
33
     x264_emms();
34
-    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
35
+    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
36
 
37
     /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
38
     if( !referenced )
39
@@ -1065,72 +1069,17 @@
40
     {
41
         int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
42
         h->mc.mbtree_propagate_cost( buf, propagate_cost,
43
-            frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
44
+            frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
45
             frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
46
         if( referenced )
47
             propagate_cost += h->mb.i_mb_width;
48
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
49
+
50
+        h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
51
+                                     bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
52
+        if( b != p1 )
53
         {
54
-            int propagate_amount = buf[h->mb.i_mb_x];
55
-            /* Don't propagate for an intra block. */
56
-            if( propagate_amount > 0 )
57
-            {
58
-                /* Access width-2 bitfield. */
59
-                int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
60
-                /* Follow the MVs to the previous frame(s). */
61
-                for( int list = 0; list < 2; list++ )
62
-                    if( (lists_used >> list)&1 )
63
-                    {
64
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
65
-                        int listamount = propagate_amount;
66
-                        /* Apply bipred weighting. */
67
-                        if( lists_used == 3 )
68
-                            listamount = (listamount * bipred_weights[list] + 32) >> 6;
69
-
70
-                        /* Early termination for simple case of mv0. */
71
-                        if( !M32( mvs[list][mb_index] ) )
72
-                        {
73
-                            CLIP_ADD( ref_costs[list][mb_index], listamount );
74
-                            continue;
75
-                        }
76
-
77
-                        int x = mvs[list][mb_index][0];
78
-                        int y = mvs[list][mb_index][1];
79
-                        int mbx = (x>>5)+h->mb.i_mb_x;
80
-                        int mby = (y>>5)+h->mb.i_mb_y;
81
-                        int idx0 = mbx + mby * h->mb.i_mb_stride;
82
-                        int idx1 = idx0 + 1;
83
-                        int idx2 = idx0 + h->mb.i_mb_stride;
84
-                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
85
-                        x &= 31;
86
-                        y &= 31;
87
-                        int idx0weight = (32-y)*(32-x);
88
-                        int idx1weight = (32-y)*x;
89
-                        int idx2weight = y*(32-x);
90
-                        int idx3weight = y*x;
91
-
92
-                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
93
-                         * be counted. */
94
-                        if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
95
-                        {
96
-                            CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
97
-                            CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
98
-                            CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
99
-                            CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
100
-                        }
101
-                        else /* Check offsets individually */
102
-                        {
103
-                            if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
104
-                                CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
105
-                            if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
106
-                                CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
107
-                            if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
108
-                                CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
109
-                            if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
110
-                                CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
111
-                        }
112
-                    }
113
-            }
114
+            h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
115
+                                         bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
116
         }
117
     }
118
 
119
x264-snapshot-20130723-2245.tar.bz2/filters/filters.c -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * filters.c: common filter functions
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Diogo Franco <diogomfranco@gmail.com>
9
  *          Steven Walters <kemuri9@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/filters/filters.h -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * filters.h: common filter functions
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Diogo Franco <diogomfranco@gmail.com>
9
  *          Steven Walters <kemuri9@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/cache.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/cache.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * cache.c: cache video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/crop.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/crop.c Changed
20
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * crop.c: crop video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *          James Darnley <james.darnley@gmail.com>
10
@@ -105,8 +105,7 @@
11
     for( int i = 0; i < output->img.planes; i++ )
12
     {
13
         intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
14
-        offset += h->dims[0] * h->csp->width[i];
15
-        offset *= x264_cli_csp_depth_factor( output->img.csp );
16
+        offset += h->dims[0] * h->csp->width[i] * x264_cli_csp_depth_factor( output->img.csp );
17
         output->img.plane[i] += offset;
18
     }
19
     return 0;
20
x264-snapshot-20130723-2245.tar.bz2/filters/video/depth.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/depth.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * depth.c: bit-depth conversion video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Oskar Arvidsson <oskar@irock.se>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/fix_vfr_pts.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/fix_vfr_pts.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * fix_vfr_pts.c: vfr pts fixing video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * internal.c: video filter utilities
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * internal.h: video filter utilities
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/resize.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/resize.c Changed
118
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * resize.c: resize video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
@@ -45,8 +45,8 @@
11
 #include <libavutil/opt.h>
12
 #include <libavutil/pixdesc.h>
13
 
14
-#ifndef PIX_FMT_BGRA64
15
-#define PIX_FMT_BGRA64 PIX_FMT_NONE
16
+#ifndef AV_PIX_FMT_BGRA64
17
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE
18
 #endif
19
 
20
 typedef struct
21
@@ -94,9 +94,12 @@
22
 
23
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
24
     {
25
-        printf( "%s", x264_cli_csps[i].name );
26
-        if( i+1 < X264_CSP_CLI_MAX )
27
-            printf( ", " );
28
+        if( x264_cli_csps[i].name )
29
+        {
30
+            printf( "%s", x264_cli_csps[i].name );
31
+            if( i+1 < X264_CSP_CLI_MAX )
32
+                printf( ", " );
33
+        }
34
     }
35
     printf( "\n"
36
             "               - depth: 8 or 16 bits per pixel [keep current]\n"
37
@@ -143,19 +146,19 @@
38
     switch( csp&X264_CSP_MASK )
39
     {
40
         case X264_CSP_YV12: /* specially handled via swapping chroma */
41
-        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
42
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
43
         case X264_CSP_YV16: /* specially handled via swapping chroma */
44
-        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
45
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P;
46
         case X264_CSP_YV24: /* specially handled via swapping chroma */
47
-        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
48
-        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
49
-        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48     : PIX_FMT_BGR24;
50
-        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64    : PIX_FMT_BGRA;
51
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P;
52
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48     : AV_PIX_FMT_RGB24;
53
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48     : AV_PIX_FMT_BGR24;
54
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64    : AV_PIX_FMT_BGRA;
55
         /* the next csp has no equivalent 16bit depth in swscale */
56
-        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
57
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV12;
58
         /* the next csp is no supported by swscale at all */
59
         case X264_CSP_NV16:
60
-        default:            return PIX_FMT_NONE;
61
+        default:            return AV_PIX_FMT_NONE;
62
     }
63
 }
64
 
65
@@ -175,12 +178,12 @@
66
     int pix_fmt = convert_csp_to_pix_fmt( csp );
67
     // first determine the base csp
68
     int ret = X264_CSP_NONE;
69
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt;
70
-    if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name )
71
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt );
72
+    if( !pix_desc || !pix_desc->name )
73
         return ret;
74
 
75
     const char *pix_fmt_name = pix_desc->name;
76
-    int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL);
77
+    int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL);
78
     int is_bgr = !!strstr( pix_fmt_name, "bgr" );
79
     if( is_bgr || is_rgb )
80
     {
81
@@ -243,8 +246,11 @@
82
         if( strlen( str_csp ) == 0 )
83
             csp = info->csp & X264_CSP_MASK;
84
         else
85
-            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
86
-                csp--;
87
+            for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- )
88
+            {
89
+                if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) )
90
+                    break;
91
+            }
92
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
93
         h->dst_csp = csp;
94
         if( depth == 16 )
95
@@ -392,7 +398,7 @@
96
     h->scale = input_prop;
97
     if( !h->buffer_allocated )
98
     {
99
-        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
100
+        if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
101
             return -1;
102
         h->buffer_allocated = 1;
103
     }
104
@@ -462,11 +468,11 @@
105
     int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
106
 
107
     /* confirm swscale can support this conversion */
108
-    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
109
+    FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
110
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
111
                    info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
112
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
113
-    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
114
+    FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
115
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
116
                    h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
117
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
118
x264-snapshot-20130723-2245.tar.bz2/filters/video/select_every.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/select_every.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * select_every.c: select-every video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/source.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/source.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * source.c: source video filter
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/video.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * video.c: video filters
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/video.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * video.h: video filters
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/input/avs.c -> x264-snapshot-20140321-2245.tar.bz2/input/avs.c Changed
69
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * avs.c: avisynth input
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
@@ -35,7 +35,7 @@
11
 #define avs_address dlsym
12
 #else
13
 #include <windows.h>
14
-#define avs_open LoadLibrary( "avisynth" )
15
+#define avs_open LoadLibraryW( L"avisynth" )
16
 #define avs_close FreeLibrary
17
 #define avs_address GetProcAddress
18
 #endif
19
@@ -172,7 +172,7 @@
20
 
21
 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
22
 {
23
-    FILE *fh = fopen( psz_filename, "r" );
24
+    FILE *fh = x264_fopen( psz_filename, "r" );
25
     if( !fh )
26
         return -1;
27
     FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
28
@@ -192,7 +192,16 @@
29
     if( avs_version <= 0 )
30
         return -1;
31
     x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
32
+
33
+#ifdef _WIN32
34
+    /* Avisynth doesn't support Unicode filenames. */
35
+    char ansi_filename[MAX_PATH];
36
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
37
+    AVS_Value arg = avs_new_value_string( ansi_filename );
38
+#else
39
     AVS_Value arg = avs_new_value_string( psz_filename );
40
+#endif
41
+
42
     AVS_Value res;
43
     char *filename_ext = get_filename_extension( psz_filename );
44
 
45
@@ -329,11 +338,11 @@
46
         info->csp = X264_CSP_I420;
47
 #if HAVE_SWSCALE
48
     else if( avs_is_yuy2( vi ) )
49
-        info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER;
50
+        info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
51
     else if( avs_is_yv411( vi ) )
52
-        info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER;
53
+        info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
54
     else if( avs_is_y8( vi ) )
55
-        info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER;
56
+        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
57
 #endif
58
     else
59
         info->csp = X264_CSP_NONE;
60
@@ -352,7 +361,7 @@
61
     if( cli_csp )
62
         pic->img.planes = cli_csp->planes;
63
 #if HAVE_SWSCALE
64
-    else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) )
65
+    else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) )
66
         pic->img.planes = 3;
67
     else
68
         pic->img.planes = 1; //y8 and yuy2 are one plane
69
x264-snapshot-20130723-2245.tar.bz2/input/ffms.c -> x264-snapshot-20140321-2245.tar.bz2/input/ffms.c Changed
105
 
1
@@ -1,10 +1,11 @@
2
 /*****************************************************************************
3
  * ffms.c: ffmpegsource input
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
9
  *          Steven Walters <kemuri9@gmail.com>
10
+ *          Henrik Gramner <henrik@gramner.com>
11
  *
12
  * This program is free software; you can redistribute it and/or modify
13
  * it under the terms of the GNU General Public License as published by
14
@@ -34,8 +35,6 @@
15
 
16
 #ifdef _WIN32
17
 #include <windows.h>
18
-#else
19
-#define SetConsoleTitle(t)
20
 #endif
21
 
22
 typedef struct
23
@@ -60,7 +59,7 @@
24
     char buf[200];
25
     sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total );
26
     fprintf( stderr, "%s  \r", buf+5 );
27
-    SetConsoleTitle( buf );
28
+    x264_cli_set_console_title( buf );
29
     fflush( stderr );
30
     return 0;
31
 }
32
@@ -70,9 +69,9 @@
33
 {
34
     switch( csp )
35
     {
36
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
37
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
38
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
39
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
40
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
41
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
42
         default:                               return csp;
43
     }
44
 }
45
@@ -82,7 +81,21 @@
46
     ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) );
47
     if( !h )
48
         return -1;
49
+
50
+#ifdef __MINGW32__
51
+    /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */
52
     FFMS_Init( 0, 0 );
53
+    char src_filename[MAX_PATH];
54
+    char idx_filename[MAX_PATH];
55
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
56
+    if( opt->index_file )
57
+        FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" );
58
+#else
59
+    FFMS_Init( 0, 1 );
60
+    char *src_filename = psz_filename;
61
+    char *idx_filename = opt->index_file;
62
+#endif
63
+
64
     FFMS_ErrorInfo e;
65
     e.BufferSize = 0;
66
     int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW;
67
@@ -90,29 +103,29 @@
68
     FFMS_Index *idx = NULL;
69
     if( opt->index_file )
70
     {
71
-        struct stat index_s, input_s;
72
-        if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) &&
73
-            input_s.st_mtime < index_s.st_mtime )
74
-            idx = FFMS_ReadIndex( opt->index_file, &e );
75
+        x264_struct_stat index_s, input_s;
76
+        if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) &&
77
+            input_s.st_mtime < index_s.st_mtime && index_s.st_size )
78
+            idx = FFMS_ReadIndex( idx_filename, &e );
79
     }
80
     if( !idx )
81
     {
82
         if( opt->progress )
83
         {
84
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
85
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
86
             fprintf( stderr, "                                            \r" );
87
         }
88
         else
89
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
90
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
91
         FAIL_IF_ERROR( !idx, "could not create index\n" )
92
-        if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
93
+        if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) )
94
             x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
95
     }
96
 
97
     int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
98
     FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
99
 
100
-    h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
101
+    h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e );
102
     FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
103
 
104
     h->track = FFMS_GetTrackFromVideo( h->video_source );
105
x264-snapshot-20130723-2245.tar.bz2/input/input.c -> x264-snapshot-20140321-2245.tar.bz2/input/input.c Changed
63
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * input.c: common input functions
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Steven Walters <kemuri9@gmail.com>
9
  *
10
@@ -42,7 +42,8 @@
11
 int x264_cli_csp_is_invalid( int csp )
12
 {
13
     int csp_mask = csp & X264_CSP_MASK;
14
-    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
15
+    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX ||
16
+           csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER;
17
 }
18
 
19
 int x264_cli_csp_depth_factor( int csp )
20
@@ -74,7 +75,7 @@
21
     return size;
22
 }
23
 
24
-int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
25
+static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
26
 {
27
     memset( pic, 0, sizeof(cli_pic_t) );
28
     int csp_mask = csp & X264_CSP_MASK;
29
@@ -87,15 +88,29 @@
30
     pic->img.height = height;
31
     for( int i = 0; i < pic->img.planes; i++ )
32
     {
33
-         pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
34
-         if( !pic->img.plane[i] )
35
-             return -1;
36
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
37
+        int stride = width * x264_cli_csps[csp_mask].width[i];
38
+        stride *= x264_cli_csp_depth_factor( csp );
39
+        stride = ALIGN( stride, align );
40
+        uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
41
+        pic->img.plane[i] = x264_malloc( size );
42
+        if( !pic->img.plane[i] )
43
+            return -1;
44
+        pic->img.stride[i] = stride;
45
     }
46
 
47
     return 0;
48
 }
49
 
50
+int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
51
+{
52
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
53
+}
54
+
55
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
56
+{
57
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
58
+}
59
+
60
 void x264_cli_pic_clean( cli_pic_t *pic )
61
 {
62
     for( int i = 0; i < pic->img.planes; i++ )
63
x264-snapshot-20130723-2245.tar.bz2/input/input.h -> x264-snapshot-20140321-2245.tar.bz2/input/input.h Changed
18
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * input.h: file input
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -124,6 +124,7 @@
11
 int      x264_cli_csp_is_invalid( int csp );
12
 int      x264_cli_csp_depth_factor( int csp );
13
 int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
14
+int      x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height );
15
 void     x264_cli_pic_clean( cli_pic_t *pic );
16
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
17
 uint64_t x264_cli_pic_size( int csp, int width, int height );
18
x264-snapshot-20130723-2245.tar.bz2/input/lavf.c -> x264-snapshot-20140321-2245.tar.bz2/input/lavf.c Changed
41
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * lavf.c: libavformat input
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
9
  *          Steven Walters <kemuri9@gmail.com>
10
@@ -53,9 +53,9 @@
11
 {
12
     switch( csp )
13
     {
14
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
15
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
16
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
17
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
18
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
19
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
20
         default:                               return csp;
21
     }
22
 }
23
@@ -162,7 +162,7 @@
24
     if( opt->resolution )
25
     {
26
         av_dict_set( &options, "video_size", opt->resolution, 0 );
27
-        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P );
28
+        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P );
29
         av_dict_set( &options, "pixel_format", csp, 0 );
30
     }
31
 
32
@@ -210,7 +210,7 @@
33
 
34
     /* avisynth stores rgb data vertically flipped. */
35
     if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
36
-        (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) )
37
+        (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
38
         info->csp |= X264_CSP_VFLIP;
39
 
40
     *p_handle = h;
41
x264-snapshot-20130723-2245.tar.bz2/input/raw.c -> x264-snapshot-20140321-2245.tar.bz2/input/raw.c Changed
66
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * raw.c: raw input
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -55,8 +55,11 @@
11
     FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
12
     if( opt->colorspace )
13
     {
14
-        for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); )
15
-            info->csp--;
16
+        for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
17
+        {
18
+            if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) )
19
+                break;
20
+        }
21
         FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace );
22
     }
23
     else /* default */
24
@@ -70,7 +73,7 @@
25
     if( !strcmp( psz_filename, "-" ) )
26
         h->fh = stdin;
27
     else
28
-        h->fh = fopen( psz_filename, "rb" );
29
+        h->fh = x264_fopen( psz_filename, "rb" );
30
     if( h->fh == NULL )
31
         return -1;
32
 
33
@@ -99,14 +102,14 @@
34
     return 0;
35
 }
36
 
37
-static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
38
+static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
39
 {
40
     int error = 0;
41
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
42
     for( int i = 0; i < pic->img.planes && !error; i++ )
43
     {
44
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
45
-        if( h->bit_depth & 7 )
46
+        if( bit_depth_uc )
47
         {
48
             /* upconvert non 16bit high depth planes to 16bit using the same
49
              * algorithm as used in the depth filter. */
50
@@ -131,13 +134,13 @@
51
         else
52
             while( i_frame > h->next_frame )
53
             {
54
-                if( read_frame_internal( pic, h ) )
55
+                if( read_frame_internal( pic, h, 0 ) )
56
                     return -1;
57
                 h->next_frame++;
58
             }
59
     }
60
 
61
-    if( read_frame_internal( pic, h ) )
62
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
63
         return -1;
64
 
65
     h->next_frame = i_frame+1;
66
x264-snapshot-20130723-2245.tar.bz2/input/thread.c -> x264-snapshot-20140321-2245.tar.bz2/input/thread.c Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * thread.c: threaded input
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/input/timecode.c -> x264-snapshot-20140321-2245.tar.bz2/input/timecode.c Changed
19
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * timecode.c: timecode file input
4
  *****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
  *
8
  * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
9
  *
10
@@ -368,7 +368,7 @@
11
     timecode_input.picture_alloc = h->input.picture_alloc;
12
     timecode_input.picture_clean = h->input.picture_clean;
13
 
14
-    tcfile_in = fopen( psz_filename, "rb" );
15
+    tcfile_in = x264_fopen( psz_filename, "rb" );
16
     FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
17
     else if( !x264_is_regular_file( tcfile_in ) )
18
     {
19
x264-snapshot-20130723-2245.tar.bz2/input/y4m.c -> x264-snapshot-20140321-2245.tar.bz2/input/y4m.c Changed
53
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * y4m.c: y4m input
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -81,7 +81,7 @@
11
     if( !strcmp( psz_filename, "-" ) )
12
         h->fh = stdin;
13
     else
14
-        h->fh = fopen(psz_filename, "rb");
15
+        h->fh = x264_fopen(psz_filename, "rb");
16
     if( h->fh == NULL )
17
         return -1;
18
 
19
@@ -223,7 +223,7 @@
20
     return 0;
21
 }
22
 
23
-static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
24
+static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
25
 {
26
     size_t slen = strlen( Y4M_FRAME_MAGIC );
27
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
28
@@ -249,7 +249,7 @@
29
     for( i = 0; i < pic->img.planes && !error; i++ )
30
     {
31
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
32
-        if( h->bit_depth & 7 )
33
+        if( bit_depth_uc )
34
         {
35
             /* upconvert non 16bit high depth planes to 16bit using the same
36
              * algorithm as used in the depth filter. */
37
@@ -274,13 +274,13 @@
38
         else
39
             while( i_frame > h->next_frame )
40
             {
41
-                if( read_frame_internal( pic, h ) )
42
+                if( read_frame_internal( pic, h, 0 ) )
43
                     return -1;
44
                 h->next_frame++;
45
             }
46
     }
47
 
48
-    if( read_frame_internal( pic, h ) )
49
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
50
         return -1;
51
 
52
     h->next_frame = i_frame+1;
53
x264-snapshot-20130723-2245.tar.bz2/output/flv.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv.c Changed
23
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * flv.c: flv muxer
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Kieran Kunhya <kieran@kunhya.com>
9
  *
10
@@ -75,11 +75,10 @@
11
 
12
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
13
 {
14
-    flv_hnd_t *p_flv = malloc( sizeof(*p_flv) );
15
     *p_handle = NULL;
16
+    flv_hnd_t *p_flv = calloc( 1, sizeof(flv_hnd_t) );
17
     if( !p_flv )
18
         return -1;
19
-    memset( p_flv, 0, sizeof(*p_flv) );
20
 
21
     p_flv->b_dts_compress = opt->use_dts_compress;
22
 
23
x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.c Changed
29
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * flv_bytestream.c: flv muxer utilities
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Kieran Kunhya <kieran@kunhya.com>
9
  *
10
@@ -87,16 +87,14 @@
11
 
12
 flv_buffer *flv_create_writer( const char *filename )
13
 {
14
-    flv_buffer *c = malloc( sizeof(*c) );
15
-
16
+    flv_buffer *c = calloc( 1, sizeof(flv_buffer) );
17
     if( !c )
18
         return NULL;
19
-    memset( c, 0, sizeof(*c) );
20
 
21
     if( !strcmp( filename, "-" ) )
22
         c->fp = stdout;
23
     else
24
-        c->fp = fopen( filename, "wb" );
25
+        c->fp = x264_fopen( filename, "wb" );
26
     if( !c->fp )
27
     {
28
         free( c );
29
x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.h -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * flv_bytestream.h: flv muxer utilities
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Kieran Kunhya <kieran@kunhya.com>
9
  *
10
x264-snapshot-20130723-2245.tar.bz2/output/matroska.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska.c Changed
53
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * matroska.c: matroska muxer
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Mike Matsnev <mike@haali.su>
9
  *
10
@@ -33,6 +33,7 @@
11
     int width, height, d_width, d_height;
12
 
13
     int display_size_units;
14
+    int stereo_mode;
15
 
16
     int64_t frame_duration;
17
 
18
@@ -44,16 +45,11 @@
19
 
20
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
21
 {
22
-    mkv_hnd_t *p_mkv;
23
-
24
     *p_handle = NULL;
25
-
26
-    p_mkv  = malloc( sizeof(*p_mkv) );
27
+    mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) );
28
     if( !p_mkv )
29
         return -1;
30
 
31
-    memset( p_mkv, 0, sizeof(*p_mkv) );
32
-
33
     p_mkv->w = mk_create_writer( psz_filename );
34
     if( !p_mkv->w )
35
     {
36
@@ -84,6 +80,7 @@
37
     p_mkv->width = p_mkv->d_width = p_param->i_width;
38
     p_mkv->height = p_mkv->d_height = p_param->i_height;
39
     p_mkv->display_size_units = DS_PIXELS;
40
+    p_mkv->stereo_mode = p_param->i_frame_packing;
41
 
42
     if( p_param->vui.i_sar_width && p_param->vui.i_sar_height
43
         && p_param->vui.i_sar_width != p_param->vui.i_sar_height )
44
@@ -152,7 +149,7 @@
45
     ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
46
                            avcC, avcC_len, p_mkv->frame_duration, 50000,
47
                            p_mkv->width, p_mkv->height,
48
-                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units );
49
+                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode );
50
     if( ret < 0 )
51
         return ret;
52
 
53
x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.c Changed
71
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * matroska_ebml.c: matroska muxer utilities
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Mike Matsnev <mike@haali.su>
9
  *
10
@@ -74,10 +74,9 @@
11
     }
12
     else
13
     {
14
-        c = malloc( sizeof(*c) );
15
+        c = calloc( 1, sizeof(mk_context) );
16
         if( !c )
17
             return NULL;
18
-        memset( c, 0, sizeof(*c) );
19
     }
20
 
21
     c->parent = parent;
22
@@ -291,12 +290,10 @@
23
 
24
 mk_writer *mk_create_writer( const char *filename )
25
 {
26
-    mk_writer *w = malloc( sizeof(*w) );
27
+    mk_writer *w = calloc( 1, sizeof(mk_writer) );
28
     if( !w )
29
         return NULL;
30
 
31
-    memset( w, 0, sizeof(*w) );
32
-
33
     w->root = mk_create_context( w, NULL, 0 );
34
     if( !w->root )
35
     {
36
@@ -307,7 +304,7 @@
37
     if( !strcmp( filename, "-" ) )
38
         w->fp = stdout;
39
     else
40
-        w->fp = fopen( filename, "wb" );
41
+        w->fp = x264_fopen( filename, "wb" );
42
     if( !w->fp )
43
     {
44
         mk_destroy_contexts( w );
45
@@ -320,13 +317,15 @@
46
     return w;
47
 }
48
 
49
+static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13};
50
+
51
 int mk_write_header( mk_writer *w, const char *writing_app,
52
                      const char *codec_id,
53
                      const void *codec_private, unsigned codec_private_size,
54
                      int64_t default_frame_duration,
55
                      int64_t timescale,
56
                      unsigned width, unsigned height,
57
-                     unsigned d_width, unsigned d_height, int display_size_units )
58
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode )
59
 {
60
     mk_context  *c, *ti, *v;
61
 
62
@@ -382,6 +381,8 @@
63
     CHECK( mk_write_uint( v, 0x54b2, display_size_units ) );
64
     CHECK( mk_write_uint( v, 0x54b0, d_width ) );
65
     CHECK( mk_write_uint( v, 0x54ba, d_height ) );
66
+    if( stereo_mode >= 0 && stereo_mode <= 5 )
67
+        CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) );
68
     CHECK( mk_close_context( v, 0 ) );
69
 
70
     CHECK( mk_close_context( ti, 0 ) );
71
x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.h -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.h Changed
19
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * matroska_ebml.h: matroska muxer utilities
4
  *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
  *
8
  * Authors: Mike Matsnev <mike@haali.su>
9
  *
10
@@ -42,7 +42,7 @@
11
                      int64_t default_frame_duration,
12
                      int64_t timescale,
13
                      unsigned width, unsigned height,
14
-                     unsigned d_width, unsigned d_height, int display_size_units );
15
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode );
16
 
17
 int mk_start_frame( mk_writer *w );
18
 int mk_add_frame_data( mk_writer *w, const void *data, unsigned size );
19
x264-snapshot-20130723-2245.tar.bz2/output/mp4.c -> x264-snapshot-20140321-2245.tar.bz2/output/mp4.c Changed
57
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * mp4.c: mp4 muxer
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -27,13 +27,8 @@
11
 #include "output.h"
12
 #include <gpac/isomedia.h>
13
 
14
-#if HAVE_GF_MALLOC
15
-#undef malloc
16
-#undef free
17
-#undef realloc
18
-#define malloc gf_malloc
19
-#define free gf_free
20
-#define realloc gf_realloc
21
+#ifdef _WIN32
22
+#include <windows.h>
23
 #endif
24
 
25
 typedef struct
26
@@ -170,20 +165,25 @@
27
 
28
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
29
 {
30
-    mp4_hnd_t *p_mp4;
31
-
32
     *p_handle = NULL;
33
-    FILE *fh = fopen( psz_filename, "w" );
34
+    FILE *fh = x264_fopen( psz_filename, "w" );
35
     if( !fh )
36
         return -1;
37
     FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
38
     fclose( fh );
39
 
40
-    if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
41
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
42
+    if( !p_mp4 )
43
         return -1;
44
 
45
-    memset( p_mp4, 0, sizeof(mp4_hnd_t) );
46
+#ifdef _WIN32
47
+    /* GPAC doesn't support Unicode filenames. */
48
+    char ansi_filename[MAX_PATH];
49
+    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
50
+    p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
51
+#else
52
     p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
53
+#endif
54
 
55
     p_mp4->b_dts_compress = opt->use_dts_compress;
56
 
57
x264-snapshot-20140321-2245.tar.bz2/output/mp4_lsmash.c Added
421
 
1
@@ -0,0 +1,419 @@
2
+/*****************************************************************************
3
+ * mp4_lsmash.c: mp4 muxer using L-SMASH
4
+ *****************************************************************************
5
+ * Copyright (C) 2003-2014 x264 project
6
+ *
7
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
+ *          Loren Merritt <lorenm@u.washington.edu>
9
+ *          Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
10
+ *          Takashi Hirata <silverfilain@gmail.com>
11
+ *          golgol7777 <golgol7777@gmail.com>
12
+ *
13
+ * This program is free software; you can redistribute it and/or modify
14
+ * it under the terms of the GNU General Public License as published by
15
+ * the Free Software Foundation; either version 2 of the License, or
16
+ * (at your option) any later version.
17
+ *
18
+ * This program is distributed in the hope that it will be useful,
19
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
+ * GNU General Public License for more details.
22
+ *
23
+ * You should have received a copy of the GNU General Public License
24
+ * along with this program; if not, write to the Free Software
25
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
26
+ *
27
+ * This program is also available under a commercial proprietary license.
28
+ * For more information, contact us at licensing@x264.com.
29
+ *****************************************************************************/
30
+
31
+#include "output.h"
32
+#include <lsmash.h>
33
+
34
+#define H264_NALU_LENGTH_SIZE 4
35
+
36
+/*******************/
37
+
38
+#define MP4_LOG_ERROR( ... )                x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ )
39
+#define MP4_LOG_WARNING( ... )              x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ )
40
+#define MP4_LOG_INFO( ... )                 x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ )
41
+#define MP4_FAIL_IF_ERR( cond, ... )        FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ )
42
+
43
+/* For close_file() */
44
+#define MP4_LOG_IF_ERR( cond, ... )\
45
+if( cond )\
46
+{\
47
+    MP4_LOG_ERROR( __VA_ARGS__ );\
48
+}
49
+
50
+/* For open_file() */
51
+#define MP4_FAIL_IF_ERR_EX( cond, ... )\
52
+if( cond )\
53
+{\
54
+    remove_mp4_hnd( p_mp4 );\
55
+    MP4_LOG_ERROR( __VA_ARGS__ );\
56
+    return -1;\
57
+}
58
+
59
+/*******************/
60
+
61
+typedef struct
62
+{
63
+    lsmash_root_t *p_root;
64
+    lsmash_video_summary_t *summary;
65
+    int b_stdout;
66
+    uint32_t i_movie_timescale;
67
+    uint32_t i_video_timescale;
68
+    uint32_t i_track;
69
+    uint32_t i_sample_entry;
70
+    uint64_t i_time_inc;
71
+    int64_t i_start_offset;
72
+    uint64_t i_first_cts;
73
+    uint64_t i_prev_dts;
74
+    uint32_t i_sei_size;
75
+    uint8_t *p_sei_buffer;
76
+    int i_numframe;
77
+    int64_t i_init_delta;
78
+    int i_delay_frames;
79
+    int b_dts_compress;
80
+    int i_dts_compress_multiplier;
81
+    int b_use_recovery;
82
+    int b_fragments;
83
+} mp4_hnd_t;
84
+
85
+/*******************/
86
+
87
+static void remove_mp4_hnd( hnd_t handle )
88
+{
89
+    mp4_hnd_t *p_mp4 = handle;
90
+    if( !p_mp4 )
91
+        return;
92
+    if( p_mp4->p_sei_buffer )
93
+    {
94
+        free( p_mp4->p_sei_buffer );
95
+        p_mp4->p_sei_buffer = NULL;
96
+    }
97
+    if( p_mp4->p_root )
98
+    {
99
+        lsmash_destroy_root( p_mp4->p_root );
100
+        p_mp4->p_root = NULL;
101
+    }
102
+    free( p_mp4 );
103
+}
104
+
105
+/*******************/
106
+
107
+static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts )
108
+{
109
+    mp4_hnd_t *p_mp4 = handle;
110
+
111
+    if( !p_mp4 )
112
+        return 0;
113
+
114
+    if( p_mp4->p_root )
115
+    {
116
+        double actual_duration = 0;
117
+        if( p_mp4->i_track )
118
+        {
119
+            /* Flush the rest of samples and add the last sample_delta. */
120
+            uint32_t last_delta = largest_pts - second_largest_pts;
121
+            MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ),
122
+                            "failed to flush the rest of samples.\n" );
123
+
124
+            if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 )    /* avoid zero division */
125
+                actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale;
126
+            else
127
+                MP4_LOG_ERROR( "timescale is broken.\n" );
128
+
129
+            /*
130
+             * Declare the explicit time-line mapping.
131
+             * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment
132
+             * is given by not the movie timescale but rather the media timescale.
133
+             * The reason is that ISO media have two time-lines, presentation and media time-line,
134
+             * and an edit maps the presentation time-line to the media time-line.
135
+             * According to QuickTime file format specification and the actual playback in QuickTime Player,
136
+             * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes
137
+             * the track's media_rate so that the entire media can be used by the track.
138
+             * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly.
139
+             * Note: Any demuxers should follow the Edit List Box if it exists.
140
+             */
141
+            lsmash_edit_t edit;
142
+            edit.duration   = actual_duration;
143
+            edit.start_time = p_mp4->i_first_cts;
144
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
145
+            if( !p_mp4->b_fragments )
146
+            {
147
+                MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
148
+                                "failed to set timeline map for video.\n" );
149
+            }
150
+            else if( !p_mp4->b_stdout )
151
+                MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ),
152
+                                "failed to update timeline map for video.\n" );
153
+        }
154
+
155
+        MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" );
156
+    }
157
+
158
+    remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */
159
+
160
+    return 0;
161
+}
162
+
163
+static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
164
+{
165
+    *p_handle = NULL;
166
+
167
+    int b_regular = strcmp( psz_filename, "-" );
168
+    b_regular = b_regular && x264_is_regular_file_path( psz_filename );
169
+    if( b_regular )
170
+    {
171
+        FILE *fh = x264_fopen( psz_filename, "wb" );
172
+        MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename );
173
+        b_regular = x264_is_regular_file( fh );
174
+        fclose( fh );
175
+    }
176
+
177
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
178
+    MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" );
179
+
180
+    p_mp4->b_dts_compress = opt->use_dts_compress;
181
+    p_mp4->b_use_recovery = 0; // we don't really support recovery
182
+    p_mp4->b_fragments    = !b_regular;
183
+    p_mp4->b_stdout       = !strcmp( psz_filename, "-" );
184
+
185
+    p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE );
186
+    MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" );
187
+
188
+    p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO );
189
+    MP4_FAIL_IF_ERR_EX( !p_mp4->summary,
190
+                        "failed to allocate memory for summary information of video.\n" );
191
+    p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO;
192
+
193
+    *p_handle = p_mp4;
194
+
195
+    return 0;
196
+}
197
+
198
+static int set_param( hnd_t handle, x264_param_t *p_param )
199
+{
200
+    mp4_hnd_t *p_mp4 = handle;
201
+    uint64_t i_media_timescale;
202
+
203
+    p_mp4->i_delay_frames = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
204
+    p_mp4->i_dts_compress_multiplier = p_mp4->b_dts_compress * p_mp4->i_delay_frames + 1;
205
+
206
+    i_media_timescale = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier;
207
+    p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier;
208
+    MP4_FAIL_IF_ERR( i_media_timescale > UINT32_MAX, "MP4 media timescale %"PRIu64" exceeds maximum\n", i_media_timescale );
209
+
210
+    /* Select brands. */
211
+    lsmash_brand_type brands[6] = { 0 };
212
+    uint32_t brand_count = 0;
213
+    brands[brand_count++] = ISOM_BRAND_TYPE_MP42;
214
+    brands[brand_count++] = ISOM_BRAND_TYPE_MP41;
215
+    brands[brand_count++] = ISOM_BRAND_TYPE_ISOM;
216
+    if( p_mp4->b_use_recovery )
217
+    {
218
+        brands[brand_count++] = ISOM_BRAND_TYPE_AVC1;   /* sdtp, sgpd, sbgp and visual roll recovery grouping */
219
+        if( p_param->b_open_gop )
220
+            brands[brand_count++] = ISOM_BRAND_TYPE_ISO6;   /* cslg and visual random access grouping */
221
+    }
222
+
223
+    /* Set movie parameters. */
224
+    lsmash_movie_parameters_t movie_param;
225
+    lsmash_initialize_movie_parameters( &movie_param );
226
+    movie_param.major_brand = ISOM_BRAND_TYPE_MP42;
227
+    movie_param.brands = brands;
228
+    movie_param.number_of_brands = brand_count;
229
+    MP4_FAIL_IF_ERR( lsmash_set_movie_parameters( p_mp4->p_root, &movie_param ),
230
+                     "failed to set movie parameters.\n" );
231
+    p_mp4->i_movie_timescale = lsmash_get_movie_timescale( p_mp4->p_root );
232
+    MP4_FAIL_IF_ERR( !p_mp4->i_movie_timescale, "movie timescale is broken.\n" );
233
+
234
+    /* Create a video track. */
235
+    p_mp4->i_track = lsmash_create_track( p_mp4->p_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK );
236
+    MP4_FAIL_IF_ERR( !p_mp4->i_track, "failed to create a video track.\n" );
237
+
238
+    p_mp4->summary->width = p_param->i_width;
239
+    p_mp4->summary->height = p_param->i_height;
240
+    uint32_t i_display_width = p_param->i_width << 16;
241
+    uint32_t i_display_height = p_param->i_height << 16;
242
+    if( p_param->vui.i_sar_width && p_param->vui.i_sar_height )
243
+    {
244
+        double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height;
245
+        if( sar > 1.0 )
246
+            i_display_width *= sar;
247
+        else
248
+            i_display_height /= sar;
249
+        p_mp4->summary->par_h = p_param->vui.i_sar_width;
250
+        p_mp4->summary->par_v = p_param->vui.i_sar_height;
251
+    }
252
+    p_mp4->summary->color.primaries_index = p_param->vui.i_colorprim;
253
+    p_mp4->summary->color.transfer_index  = p_param->vui.i_transfer;
254
+    p_mp4->summary->color.matrix_index    = p_param->vui.i_colmatrix >= 0 ? p_param->vui.i_colmatrix : ISOM_MATRIX_INDEX_UNSPECIFIED;
255
+    p_mp4->summary->color.full_range      = p_param->vui.b_fullrange >= 0 ? p_param->vui.b_fullrange : 0;
256
+
257
+    /* Set video track parameters. */
258
+    lsmash_track_parameters_t track_param;
259
+    lsmash_initialize_track_parameters( &track_param );
260
+    lsmash_track_mode track_mode = ISOM_TRACK_ENABLED | ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW;
261
+    track_param.mode = track_mode;
262
+    track_param.display_width = i_display_width;
263
+    track_param.display_height = i_display_height;
264
+    MP4_FAIL_IF_ERR( lsmash_set_track_parameters( p_mp4->p_root, p_mp4->i_track, &track_param ),
265
+                     "failed to set track parameters for video.\n" );
266
+
267
+    /* Set video media parameters. */
268
+    lsmash_media_parameters_t media_param;
269
+    lsmash_initialize_media_parameters( &media_param );
270
+    media_param.timescale = i_media_timescale;
271
+    media_param.media_handler_name = "L-SMASH Video Media Handler";
272
+    if( p_mp4->b_use_recovery )
273
+    {
274
+        media_param.roll_grouping = p_param->b_intra_refresh;
275
+        media_param.rap_grouping = p_param->b_open_gop;
276
+    }
277
+    MP4_FAIL_IF_ERR( lsmash_set_media_parameters( p_mp4->p_root, p_mp4->i_track, &media_param ),
278
+                     "failed to set media parameters for video.\n" );
279
+    p_mp4->i_video_timescale = lsmash_get_media_timescale( p_mp4->p_root, p_mp4->i_track );
280
+    MP4_FAIL_IF_ERR( !p_mp4->i_video_timescale, "media timescale for video is broken.\n" );
281
+
282
+    return 0;
283
+}
284
+
285
+static int write_headers( hnd_t handle, x264_nal_t *p_nal )
286
+{
287
+    mp4_hnd_t *p_mp4 = handle;
288
+
289
+    uint32_t sps_size = p_nal[0].i_payload - H264_NALU_LENGTH_SIZE;
290
+    uint32_t pps_size = p_nal[1].i_payload - H264_NALU_LENGTH_SIZE;
291
+    uint32_t sei_size = p_nal[2].i_payload;
292
+
293
+    uint8_t *sps = p_nal[0].p_payload + H264_NALU_LENGTH_SIZE;
294
+    uint8_t *pps = p_nal[1].p_payload + H264_NALU_LENGTH_SIZE;
295
+    uint8_t *sei = p_nal[2].p_payload;
296
+
297
+    lsmash_codec_specific_t *cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264,
298
+                                                                     LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
299
+
300
+    lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)cs->data.structured;
301
+    param->lengthSizeMinusOne = H264_NALU_LENGTH_SIZE - 1;
302
+
303
+    /* SPS
304
+     * The remaining parameters are automatically set by SPS. */
305
+    if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_SPS, sps, sps_size ) )
306
+    {
307
+        MP4_LOG_ERROR( "failed to append SPS.\n" );
308
+        return -1;
309
+    }
310
+
311
+    /* PPS */
312
+    if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_PPS, pps, pps_size ) )
313
+    {
314
+        MP4_LOG_ERROR( "failed to append PPS.\n" );
315
+        return -1;
316
+    }
317
+
318
+    if( lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ) )
319
+    {
320
+        MP4_LOG_ERROR( "failed to add H.264 specific info.\n" );
321
+        return -1;
322
+    }
323
+
324
+    lsmash_destroy_codec_specific_data( cs );
325
+
326
+    /* Additional extensions */
327
+    /* Bitrate info */
328
+    cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE,
329
+                                            LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
330
+    if( cs )
331
+        lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs );
332
+    lsmash_destroy_codec_specific_data( cs );
333
+
334
+    p_mp4->i_sample_entry = lsmash_add_sample_entry( p_mp4->p_root, p_mp4->i_track, p_mp4->summary );
335
+    MP4_FAIL_IF_ERR( !p_mp4->i_sample_entry,
336
+                     "failed to add sample entry for video.\n" );
337
+
338
+    /* SEI */
339
+    p_mp4->p_sei_buffer = malloc( sei_size );
340
+    MP4_FAIL_IF_ERR( !p_mp4->p_sei_buffer,
341
+                     "failed to allocate sei transition buffer.\n" );
342
+    memcpy( p_mp4->p_sei_buffer, sei, sei_size );
343
+    p_mp4->i_sei_size = sei_size;
344
+
345
+    return sei_size + sps_size + pps_size;
346
+}
347
+
348
+static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_t *p_picture )
349
+{
350
+    mp4_hnd_t *p_mp4 = handle;
351
+    uint64_t dts, cts;
352
+
353
+    if( !p_mp4->i_numframe )
354
+    {
355
+        p_mp4->i_start_offset = p_picture->i_dts * -1;
356
+        p_mp4->i_first_cts = p_mp4->b_dts_compress ? 0 : p_mp4->i_start_offset * p_mp4->i_time_inc;
357
+        if( p_mp4->b_fragments )
358
+        {
359
+            lsmash_edit_t edit;
360
+            edit.duration   = ISOM_EDIT_DURATION_UNKNOWN32;     /* QuickTime doesn't support 64bit duration. */
361
+            edit.start_time = p_mp4->i_first_cts;
362
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
363
+            MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
364
+                            "failed to set timeline map for video.\n" );
365
+        }
366
+    }
367
+
368
+    lsmash_sample_t *p_sample = lsmash_create_sample( i_size + p_mp4->i_sei_size );
369
+    MP4_FAIL_IF_ERR( !p_sample,
370
+                     "failed to create a video sample data.\n" );
371
+
372
+    if( p_mp4->p_sei_buffer )
373
+    {
374
+        memcpy( p_sample->data, p_mp4->p_sei_buffer, p_mp4->i_sei_size );
375
+        free( p_mp4->p_sei_buffer );
376
+        p_mp4->p_sei_buffer = NULL;
377
+    }
378
+
379
+    memcpy( p_sample->data + p_mp4->i_sei_size, p_nalu, i_size );
380
+    p_mp4->i_sei_size = 0;
381
+
382
+    if( p_mp4->b_dts_compress )
383
+    {
384
+        if( p_mp4->i_numframe == 1 )
385
+            p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
386
+        dts = p_mp4->i_numframe > p_mp4->i_delay_frames
387
+            ? p_picture->i_dts * p_mp4->i_time_inc
388
+            : p_mp4->i_numframe * (p_mp4->i_init_delta / p_mp4->i_dts_compress_multiplier);
389
+        cts = p_picture->i_pts * p_mp4->i_time_inc;
390
+    }
391
+    else
392
+    {
393
+        dts = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
394
+        cts = (p_picture->i_pts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
395
+    }
396
+
397
+    p_sample->dts = dts;
398
+    p_sample->cts = cts;
399
+    p_sample->index = p_mp4->i_sample_entry;
400
+    p_sample->prop.ra_flags = p_picture->b_keyframe ? ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC : ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE;
401
+
402
+    if( p_mp4->b_fragments && p_mp4->i_numframe && p_sample->prop.ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE )
403
+    {
404
+        MP4_FAIL_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, p_sample->dts - p_mp4->i_prev_dts ),
405
+                         "failed to flush the rest of samples.\n" );
406
+        MP4_FAIL_IF_ERR( lsmash_create_fragment_movie( p_mp4->p_root ),
407
+                         "failed to create a movie fragment.\n" );
408
+    }
409
+
410
+    /* Append data per sample. */
411
+    MP4_FAIL_IF_ERR( lsmash_append_sample( p_mp4->p_root, p_mp4->i_track, p_sample ),
412
+                     "failed to append a video frame.\n" );
413
+
414
+    p_mp4->i_prev_dts = dts;
415
+    p_mp4->i_numframe++;
416
+
417
+    return i_size;
418
+}
419
+
420
+const cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };
421
x264-snapshot-20130723-2245.tar.bz2/output/output.h -> x264-snapshot-20140321-2245.tar.bz2/output/output.h Changed
10
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * output.h: x264 file output modules
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/output/raw.c -> x264-snapshot-20140321-2245.tar.bz2/output/raw.c Changed
19
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * raw.c: raw muxer
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -30,7 +30,7 @@
11
 {
12
     if( !strcmp( psz_filename, "-" ) )
13
         *p_handle = stdout;
14
-    else if( !(*p_handle = fopen( psz_filename, "w+b" )) )
15
+    else if( !(*p_handle = x264_fopen( psz_filename, "w+b" )) )
16
         return -1;
17
 
18
     return 0;
19
x264-snapshot-20130723-2245.tar.bz2/tools/checkasm-a.asm -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm-a.asm Changed
10
 
1
@@ -1,7 +1,7 @@
2
 ;*****************************************************************************
3
 ;* checkasm-a.asm: assembly check tool
4
 ;*****************************************************************************
5
-;* Copyright (C) 2008-2013 x264 project
6
+;* Copyright (C) 2008-2014 x264 project
7
 ;*
8
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
 ;*          Henrik Gramner <henrik@gramner.com>
10
x264-snapshot-20130723-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm.c Changed
219
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * checkasm.c: assembly check tool
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
@@ -191,7 +191,6 @@
11
                     b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
12
                     b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
13
                     b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" :
14
-                    b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
15
                     b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
16
                     b->cpu&X264_CPU_BMI2 ? "_bmi2" :
17
                     b->cpu&X264_CPU_BMI1 ? "_bmi1" :
18
@@ -201,7 +200,7 @@
19
                     b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
20
 #endif
21
                     "",
22
-                    ((int64_t)10*b->cycles/b->den - nop_time)/4 );
23
+                    (int64_t)(10*b->cycles/b->den - nop_time)/4 );
24
         }
25
 }
26
 
27
@@ -407,7 +406,7 @@
28
                 } \
29
                 else \
30
                     call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
31
-                if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
32
+                if( memcmp(res_c, res_asm, N*sizeof(int)) ) \
33
                 { \
34
                     ok = 0; \
35
                     fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
36
@@ -1452,8 +1451,66 @@
37
                 }
38
         }
39
     }
40
+
41
+    if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
42
+    {
43
+        set_func_name( "plane_copy_deinterleave_rgb" );
44
+        used_asm = 1;
45
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
46
+        {
47
+            int w = (plane_specs[i].w + 2) >> 2;
48
+            int h = plane_specs[i].h;
49
+            intptr_t src_stride = plane_specs[i].src_stride;
50
+            intptr_t dst_stride = ALIGN( w, 16 );
51
+            intptr_t offv = dst_stride*h + 16;
52
+
53
+            for( int pw = 3; pw <= 4; pw++ )
54
+            {
55
+                memset( pbuf3, 0, 0x1000 );
56
+                memset( pbuf4, 0, 0x1000 );
57
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
58
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
59
+                for( int y = 0; y < h; y++ )
60
+                    if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
61
+                        memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
62
+                        memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
63
+                    {
64
+                        ok = 0;
65
+                        fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
66
+                        break;
67
+                    }
68
+            }
69
+        }
70
+    }
71
     report( "plane_copy :" );
72
 
73
+    if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
74
+    {
75
+        set_func_name( "plane_copy_deinterleave_v210" );
76
+        used_asm = 1;
77
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
78
+        {
79
+            int w = (plane_specs[i].w + 1) >> 1;
80
+            int h = plane_specs[i].h;
81
+            intptr_t dst_stride = ALIGN( w, 16 );
82
+            intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
83
+            intptr_t offv = dst_stride*h + 32;
84
+            memset( pbuf3, 0, 0x1000 );
85
+            memset( pbuf4, 0, 0x1000 );
86
+            call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
87
+            call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
88
+            for( int y = 0; y < h; y++ )
89
+                if( memcmp( pbuf3+y*dst_stride,      pbuf4+y*dst_stride,      w*sizeof(uint16_t) ) ||
90
+                    memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
91
+                {
92
+                    ok = 0;
93
+                    fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
94
+                    break;
95
+                }
96
+        }
97
+    }
98
+    report( "v210 :" );
99
+
100
     if( mc_a.hpel_filter != mc_ref.hpel_filter )
101
     {
102
         pixel *srchpel = pbuf1+8+2*64;
103
@@ -1541,16 +1598,17 @@
104
     INTEGRAL_INIT( integral_init8v, 9, sum, stride );
105
     report( "integral init :" );
106
 
107
+    ok = 1; used_asm = 0;
108
     if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
109
     {
110
-        ok = 1; used_asm = 1;
111
+        used_asm = 1;
112
         x264_emms();
113
         for( int i = 0; i < 10; i++ )
114
         {
115
-            float fps_factor = (rand()&65535) / 256.;
116
-            set_func_name( "mbtree_propagate" );
117
-            int *dsta = (int*)buf3;
118
-            int *dstc = dsta+400;
119
+            float fps_factor = (rand()&65535) / 65535.0f;
120
+            set_func_name( "mbtree_propagate_cost" );
121
+            int16_t *dsta = (int16_t*)buf3;
122
+            int16_t *dstc = dsta+400;
123
             uint16_t *prop = (uint16_t*)buf1;
124
             uint16_t *intra = (uint16_t*)buf4;
125
             uint16_t *inter = intra+128;
126
@@ -1572,12 +1630,60 @@
127
             {
128
                 ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
129
                 if( !ok )
130
-                    fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
131
+                    fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
132
             }
133
         }
134
-        report( "mbtree propagate :" );
135
     }
136
 
137
+    if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list )
138
+    {
139
+        used_asm = 1;
140
+        for( int i = 0; i < 8; i++ )
141
+        {
142
+            set_func_name( "mbtree_propagate_list" );
143
+            x264_t h;
144
+            int height = 4;
145
+            int width = 128;
146
+            int size = width*height;
147
+            h.mb.i_mb_stride = width;
148
+            h.mb.i_mb_width = width;
149
+            h.mb.i_mb_height = height;
150
+
151
+            uint16_t *ref_costsc = (uint16_t*)buf3;
152
+            uint16_t *ref_costsa = (uint16_t*)buf4;
153
+            int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size);
154
+            int16_t *propagate_amount = (int16_t*)(mvs + width);
155
+            uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width);
156
+            h.scratch_buffer2 = (uint8_t*)(ref_costsa + size);
157
+            int bipred_weight = (rand()%63)+1;
158
+            int list = i&1;
159
+            for( int j = 0; j < size; j++ )
160
+                ref_costsc[j] = ref_costsa[j] = rand()&32767;
161
+            for( int j = 0; j < width; j++ )
162
+            {
163
+                static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}};
164
+                for( int k = 0; k < 2; k++ )
165
+                    mvs[j][k] = (rand()&127) - 64;
166
+                propagate_amount[j] = rand()&32767;
167
+                lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT;
168
+            }
169
+
170
+            call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
171
+            call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
172
+
173
+            for( int j = 0; j < size && ok; j++ )
174
+            {
175
+                ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1;
176
+                if( !ok )
177
+                    fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] );
178
+            }
179
+
180
+            call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
181
+            call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
182
+        }
183
+    }
184
+    report( "mbtree :" );
185
+
186
     if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
187
     {
188
         set_func_name( "memcpy_aligned" );
189
@@ -1779,7 +1885,7 @@
190
         }
191
 
192
         h->param.rc.i_qp_min = 0;
193
-        h->param.rc.i_qp_max = QP_MAX;
194
+        h->param.rc.i_qp_max = QP_MAX_SPEC;
195
         x264_cqm_init( h );
196
         x264_quant_init( h, 0, &qf_c );
197
         x264_quant_init( h, cpu_ref, &qf_ref );
198
@@ -2504,7 +2610,7 @@
199
 {
200
     *cpu_ref = *cpu_new;
201
     *cpu_new |= flags;
202
-#if BROKEN_STACK_ALIGNMENT
203
+#if STACK_ALIGNMENT < 16
204
     *cpu_new |= X264_CPU_STACK_MOD4;
205
 #endif
206
     if( *cpu_new & X264_CPU_SSE2_IS_FAST )
207
@@ -2549,11 +2655,6 @@
208
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" );
209
         cpu1 &= ~X264_CPU_SLOW_CTZ;
210
     }
211
-    if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN )
212
-    {
213
-        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" );
214
-        cpu1 &= ~X264_CPU_SSE_MISALIGN;
215
-    }
216
     if( x264_cpu_detect() & X264_CPU_LZCNT )
217
     {
218
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );
219
x264-snapshot-20130723-2245.tar.bz2/tools/cltostr.pl -> x264-snapshot-20140321-2245.tar.bz2/tools/cltostr.pl Changed
9
 
1
@@ -1,6 +1,6 @@
2
 # Perl script used for compiling OpenCL src into x264 binary
3
 #
4
-# Copyright (C) 2013 x264 project
5
+# Copyright (C) 2013-2014 x264 project
6
 # Authors: Steve Borho <sborho@multicorewareinc.com>
7
 
8
 use Digest::MD5 qw(md5_hex);
9
x264-snapshot-20130723-2245.tar.bz2/version.sh -> x264-snapshot-20140321-2245.tar.bz2/version.sh Changed
30
 
1
@@ -1,5 +1,8 @@
2
-#!/bin/bash
3
+#!/bin/sh
4
 [ -n "$1" ] && cd $1
5
+
6
+git_version() {
7
+trap 'rm -f config.git-hash' EXIT
8
 git rev-list HEAD | sort > config.git-hash
9
 LOCALVER=`wc -l config.git-hash | awk '{print $1}'`
10
 if [ $LOCALVER \> 1 ] ; then
11
@@ -14,11 +17,13 @@
12
         VER="${VER}M"
13
     fi
14
     VER="$VER $(git rev-list HEAD -n 1 | cut -c 1-7)"
15
-    echo "#define X264_VERSION \" r$VER\""
16
-else
17
-    echo "#define X264_VERSION \"\""
18
-    VER="x"
19
+    VERSION=" r$VER"
20
 fi
21
-rm -f config.git-hash
22
+}
23
+
24
+VER="x"
25
+VERSION=""
26
+[ -d .git ] && (type git >/dev/null 2>&1) && git_version
27
+echo "#define X264_VERSION \"$VERSION\""
28
 API=`grep '#define X264_BUILD' < x264.h | sed -e 's/.* \([1-9][0-9]*\).*/\1/'`
29
 echo "#define X264_POINTVER \"0.$API.$VER\""
30
x264-snapshot-20130723-2245.tar.bz2/x264.c -> x264-snapshot-20140321-2245.tar.bz2/x264.c Changed
418
 
1
@@ -1,13 +1,14 @@
2
 /*****************************************************************************
3
  * x264: top-level x264cli functions
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Loren Merritt <lorenm@u.washington.edu>
9
  *          Laurent Aimar <fenrir@via.ecp.fr>
10
  *          Steven Walters <kemuri9@gmail.com>
11
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
12
  *          Kieran Kunhya <kieran@kunhya.com>
13
+ *          Henrik Gramner <henrik@gramner.com>
14
  *
15
  * This program is free software; you can redistribute it and/or modify
16
  * it under the terms of the GNU General Public License as published by
17
@@ -27,6 +28,15 @@
18
  * For more information, contact us at licensing@x264.com.
19
  *****************************************************************************/
20
 
21
+#ifdef _WIN32
22
+/* The following two defines must be located before the inclusion of any system header files. */
23
+#define WINVER       0x0500
24
+#define _WIN32_WINNT 0x0500
25
+#include <windows.h>
26
+#include <io.h>       /* _setmode() */
27
+#include <fcntl.h>    /* _O_BINARY */
28
+#endif
29
+
30
 #include <signal.h>
31
 #define _GNU_SOURCE
32
 #include <getopt.h>
33
@@ -38,13 +48,6 @@
34
 
35
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
36
 
37
-#ifdef _WIN32
38
-#include <windows.h>
39
-#else
40
-#define GetConsoleTitle(t,n)
41
-#define SetConsoleTitle(t)
42
-#endif
43
-
44
 #if HAVE_LAVF
45
 #undef DECLARE_ALIGNED
46
 #include <libavformat/avformat.h>
47
@@ -61,18 +64,89 @@
48
 #include <ffms.h>
49
 #endif
50
 
51
+#ifdef _WIN32
52
+#define CONSOLE_TITLE_SIZE 200
53
+static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L"";
54
+
55
+void x264_cli_set_console_title( const char *title )
56
+{
57
+    wchar_t title_utf16[CONSOLE_TITLE_SIZE];
58
+    if( utf8_to_utf16( title, title_utf16 ) )
59
+        SetConsoleTitleW( title_utf16 );
60
+}
61
+
62
+static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size )
63
+{
64
+    int invalid;
65
+    return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid;
66
+}
67
+
68
+/* Some external libraries doesn't support Unicode in filenames,
69
+ * as a workaround we can try to get an ANSI filename instead. */
70
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file )
71
+{
72
+    wchar_t filename_utf16[MAX_PATH];
73
+    if( utf8_to_utf16( filename, filename_utf16 ) )
74
+    {
75
+        if( create_file )
76
+        {
77
+            /* Create the file using the Unicode filename if it doesn't already exist. */
78
+            FILE *fh = _wfopen( filename_utf16, L"ab" );
79
+            if( fh )
80
+                fclose( fh );
81
+        }
82
+
83
+        /* Check if the filename already is valid ANSI. */
84
+        if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
85
+            return 1;
86
+
87
+        /* Check for a legacy 8.3 short filename. */
88
+        int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH );
89
+        if( short_length > 0 && short_length < MAX_PATH )
90
+            if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
91
+                return 1;
92
+    }
93
+    return 0;
94
+}
95
+
96
+/* Retrieve command line arguments as UTF-8. */
97
+static int get_argv_utf8( int *argc_ptr, char ***argv_ptr )
98
+{
99
+    int ret = 0;
100
+    wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr );
101
+    if( argv_utf16 )
102
+    {
103
+        int argc = *argc_ptr;
104
+        int offset = (argc+1) * sizeof(char*);
105
+        int size = offset;
106
+
107
+        for( int i = 0; i < argc; i++ )
108
+            size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL );
109
+
110
+        char **argv = *argv_ptr = malloc( size );
111
+        if( argv )
112
+        {
113
+            for( int i = 0; i < argc; i++ )
114
+            {
115
+                argv[i] = (char*)argv + offset;
116
+                offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL );
117
+            }
118
+            argv[argc] = NULL;
119
+            ret = 1;
120
+        }
121
+        LocalFree( argv_utf16 );
122
+    }
123
+    return ret;
124
+}
125
+#endif
126
+
127
 /* Ctrl-C handler */
128
 static volatile int b_ctrl_c = 0;
129
-static int          b_exit_on_ctrl_c = 0;
130
 static void sigint_handler( int a )
131
 {
132
-    if( b_exit_on_ctrl_c )
133
-        exit(0);
134
     b_ctrl_c = 1;
135
 }
136
 
137
-static char UNUSED originalCTitle[200] = "";
138
-
139
 typedef struct {
140
     int b_progress;
141
     int i_seek;
142
@@ -114,7 +188,7 @@
143
     "raw",
144
     "mkv",
145
     "flv",
146
-#if HAVE_GPAC
147
+#if HAVE_GPAC || HAVE_LSMASH
148
     "mp4",
149
 #endif
150
     0
151
@@ -211,7 +285,7 @@
152
     fprintf( stderr, "%s [%s]: ", name, s_level );
153
     va_list arg;
154
     va_start( arg, fmt );
155
-    vfprintf( stderr, fmt, arg );
156
+    x264_vfprintf( stderr, fmt, arg );
157
     va_end( arg );
158
 }
159
 
160
@@ -221,7 +295,7 @@
161
         return;
162
     va_list arg;
163
     va_start( arg, fmt );
164
-    vfprintf( stderr, fmt, arg );
165
+    x264_vfprintf( stderr, fmt, arg );
166
     va_end( arg );
167
 }
168
 
169
@@ -275,18 +349,22 @@
170
     FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
171
 
172
 #ifdef _WIN32
173
-    _setmode(_fileno(stdin), _O_BINARY);
174
-    _setmode(_fileno(stdout), _O_BINARY);
175
-#endif
176
+    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
177
 
178
-    GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
179
+    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
180
+    _setmode( _fileno( stdin ),  _O_BINARY );
181
+    _setmode( _fileno( stdout ), _O_BINARY );
182
+    _setmode( _fileno( stderr ), _O_BINARY );
183
+#endif
184
 
185
     /* Parse command line */
186
     if( parse( argc, argv, &param, &opt ) < 0 )
187
         ret = -1;
188
 
189
+#ifdef _WIN32
190
     /* Restore title; it can be changed by input modules */
191
-    SetConsoleTitle( originalCTitle );
192
+    SetConsoleTitleW( org_console_title );
193
+#endif
194
 
195
     /* Control-C handler */
196
     signal( SIGINT, sigint_handler );
197
@@ -306,7 +384,10 @@
198
     if( opt.qpfile )
199
         fclose( opt.qpfile );
200
 
201
-    SetConsoleTitle( originalCTitle );
202
+#ifdef _WIN32
203
+    SetConsoleTitleW( org_console_title );
204
+    free( argv );
205
+#endif
206
 
207
     return ret;
208
 }
209
@@ -339,16 +420,19 @@
210
     printf( INDENT );
211
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
212
     {
213
-        printf( "%s", x264_cli_csps[i].name );
214
-        if( i+1 < X264_CSP_CLI_MAX )
215
-            printf( ", " );
216
+        if( x264_cli_csps[i].name )
217
+        {
218
+            printf( "%s", x264_cli_csps[i].name );
219
+            if( i+1 < X264_CSP_CLI_MAX )
220
+                printf( ", " );
221
+        }
222
     }
223
 #if HAVE_LAVF
224
     printf( "\n" );
225
     printf( "                              - valid csps for `lavf' demuxer:\n" );
226
     printf( INDENT );
227
     size_t line_len = strlen( INDENT );
228
-    for( enum PixelFormat i = PIX_FMT_NONE+1; i < PIX_FMT_NB; i++ )
229
+    for( enum PixelFormat i = AV_PIX_FMT_NONE+1; i < AV_PIX_FMT_NB; i++ )
230
     {
231
         const char *pfname = av_get_pix_fmt_name( i );
232
         if( pfname )
233
@@ -361,7 +445,7 @@
234
             }
235
             printf( "%s", pfname );
236
             line_len += name_len;
237
-            if( i+1 < PIX_FMT_NB )
238
+            if( i+1 < AV_PIX_FMT_NB )
239
             {
240
                 printf( ", " );
241
                 line_len += 2;
242
@@ -389,7 +473,7 @@
243
         " .264 -> Raw bytestream\n"
244
         " .mkv -> Matroska\n"
245
         " .flv -> Flash Video\n"
246
-        " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
247
+        " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n"
248
         "Output bit depth: %d (configured at compile time)\n"
249
         "\n"
250
         "Options:\n"
251
@@ -415,7 +499,9 @@
252
         "no",
253
 #endif
254
 #if HAVE_GPAC
255
-        "yes",
256
+        "gpac",
257
+#elif HAVE_LSMASH
258
+        "lsmash",
259
 #else
260
         "no",
261
 #endif
262
@@ -764,6 +850,8 @@
263
 
264
     H2( "      --nal-hrd <string>      Signal HRD information (requires vbv-bufsize)\n"
265
         "                                  - none, vbr, cbr (cbr not allowed in .mp4)\n" );
266
+    H2( "      --filler                Force hard-CBR and generate filler (implied by\n"
267
+        "                              --nal-hrd cbr)\n" );
268
     H2( "      --pic-struct            Force pic_struct in Picture Timing SEI\n" );
269
     H2( "      --crop-rect <string>    Add 'left,top,right,bottom' to the bitstream-level\n"
270
         "                              cropping rectangle\n" );
271
@@ -792,6 +880,8 @@
272
     H0( "      --frames <integer>      Maximum number of frames to encode\n" );
273
     H0( "      --level <string>        Specify level (as defined by Annex A)\n" );
274
     H1( "      --bluray-compat         Enable compatibility hacks for Blu-ray support\n" );
275
+    H1( "      --avcintra-class <integer> Use compatibility hacks for AVC-Intra class\n"
276
+        "                                  - 50, 100, 200\n" );
277
     H1( "      --stitchable            Don't optimize headers based on video content\n"
278
         "                              Ensures ability to recombine a segmented encode\n" );
279
     H1( "\n" );
280
@@ -815,8 +905,7 @@
281
     H2( "      --no-asm                Disable all CPU optimizations\n" );
282
     H2( "      --opencl                Enable use of OpenCL\n" );
283
     H2( "      --opencl-clbin <string> Specify path of compiled OpenCL kernel cache\n" );
284
-    H2( "      --opencl-device <integer>  Specify OpenCL device ordinal\n" );
285
-    H2( "      --visualize             Show MB types overlayed on the encoded video\n" );
286
+    H2( "      --opencl-device <integer> Specify OpenCL device ordinal\n" );
287
     H2( "      --dump-yuv <string>     Save reconstructed frames\n" );
288
     H2( "      --sps-id <integer>      Set SPS and PPS id numbers [%d]\n", defaults->i_sps_id );
289
     H2( "      --aud                   Use access unit delimiters\n" );
290
@@ -848,7 +937,6 @@
291
     OPT_THREAD_INPUT,
292
     OPT_QUIET,
293
     OPT_NOPROGRESS,
294
-    OPT_VISUALIZE,
295
     OPT_LONGHELP,
296
     OPT_PROFILE,
297
     OPT_PRESET,
298
@@ -895,6 +983,7 @@
299
     { "b-pyramid",   required_argument, NULL, 0 },
300
     { "open-gop",          no_argument, NULL, 0 },
301
     { "bluray-compat",     no_argument, NULL, 0 },
302
+    { "avcintra-class", required_argument, NULL, 0 },
303
     { "min-keyint",  required_argument, NULL, 'i' },
304
     { "keyint",      required_argument, NULL, 'I' },
305
     { "intra-refresh",     no_argument, NULL, 0 },
306
@@ -997,7 +1086,6 @@
307
     { "verbose",           no_argument, NULL, 'v' },
308
     { "log-level",   required_argument, NULL, OPT_LOG_LEVEL },
309
     { "no-progress",       no_argument, NULL, OPT_NOPROGRESS },
310
-    { "visualize",         no_argument, NULL, OPT_VISUALIZE },
311
     { "dump-yuv",    required_argument, NULL, 0 },
312
     { "sps-id",      required_argument, NULL, 0 },
313
     { "aud",               no_argument, NULL, 0 },
314
@@ -1041,6 +1129,7 @@
315
     { "output-csp",  required_argument, NULL, OPT_OUTPUT_CSP },
316
     { "input-range", required_argument, NULL, OPT_INPUT_RANGE },
317
     { "stitchable",        no_argument, NULL, 0 },
318
+    { "filler",            no_argument, NULL, 0 },
319
     {0, 0, 0, 0}
320
 };
321
 
322
@@ -1052,7 +1141,7 @@
323
 
324
     if( !strcasecmp( ext, "mp4" ) )
325
     {
326
-#if HAVE_GPAC
327
+#if HAVE_GPAC || HAVE_LSMASH
328
         cli_output = mp4_output;
329
         param->b_annexb = 0;
330
         param->b_repeat_headers = 0;
331
@@ -1094,7 +1183,7 @@
332
     b_regular = b_regular && x264_is_regular_file_path( filename );
333
     if( b_regular )
334
     {
335
-        FILE *f = fopen( filename, "r" );
336
+        FILE *f = x264_fopen( filename, "r" );
337
         if( f )
338
         {
339
             b_regular = x264_is_regular_file( f );
340
@@ -1197,7 +1286,7 @@
341
     int csp = info->csp & X264_CSP_MASK;
342
     if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) )
343
         param->i_csp = X264_CSP_I420;
344
-    else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_NV16) )
345
+    else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_V210) )
346
         param->i_csp = X264_CSP_I422;
347
     else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) )
348
         param->i_csp = X264_CSP_I444;
349
@@ -1338,7 +1427,7 @@
350
                 input_opt.index_file = optarg;
351
                 break;
352
             case OPT_QPFILE:
353
-                opt->qpfile = fopen( optarg, "rb" );
354
+                opt->qpfile = x264_fopen( optarg, "rb" );
355
                 FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
356
                 if( !x264_is_regular_file( opt->qpfile ) )
357
                 {
358
@@ -1366,14 +1455,6 @@
359
             case OPT_NOPROGRESS:
360
                 opt->b_progress = 0;
361
                 break;
362
-            case OPT_VISUALIZE:
363
-#if HAVE_VISUALIZE
364
-                param->b_visualize = 1;
365
-                b_exit_on_ctrl_c = 1;
366
-#else
367
-                x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
368
-#endif
369
-                break;
370
             case OPT_TUNE:
371
             case OPT_PRESET:
372
                 break;
373
@@ -1397,7 +1478,7 @@
374
                 tcfile_name = optarg;
375
                 break;
376
             case OPT_TCFILE_OUT:
377
-                opt->tcfile_out = fopen( optarg, "wb" );
378
+                opt->tcfile_out = x264_fopen( optarg, "wb" );
379
                 FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
380
                 break;
381
             case OPT_TIMEBASE:
382
@@ -1498,8 +1579,11 @@
383
     info.fps_den    = param->i_fps_den;
384
     info.fullrange  = input_opt.input_range == RANGE_PC;
385
     info.interlaced = param->b_interlaced;
386
-    info.sar_width  = param->vui.i_sar_width;
387
-    info.sar_height = param->vui.i_sar_height;
388
+    if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
389
+    {
390
+        info.sar_width  = param->vui.i_sar_width;
391
+        info.sar_height = param->vui.i_sar_height;
392
+    }
393
     info.tff        = param->b_tff;
394
     info.vfr        = param->b_vfr_input;
395
 
396
@@ -1542,7 +1626,7 @@
397
 #endif
398
 
399
     /* override detected values by those specified by the user */
400
-    if( param->vui.i_sar_width && param->vui.i_sar_height )
401
+    if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
402
     {
403
         info.sar_width  = param->vui.i_sar_width;
404
         info.sar_height = param->vui.i_sar_height;
405
@@ -1721,11 +1805,9 @@
406
                  eta/3600, (eta/60)%60, eta%60 );
407
     }
408
     else
409
-    {
410
         sprintf( buf, "x264 %d frames: %.2f fps, %.2f kb/s", i_frame, fps, bitrate );
411
-    }
412
     fprintf( stderr, "%s  \r", buf+5 );
413
-    SetConsoleTitle( buf );
414
+    x264_cli_set_console_title( buf );
415
     fflush( stderr ); // needed in windows
416
     return i_time;
417
 }
418
x264-snapshot-20130723-2245.tar.bz2/x264.h -> x264-snapshot-20140321-2245.tar.bz2/x264.h Changed
182
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * x264.h: x264 public header
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -41,7 +41,7 @@
11
 
12
 #include "x264_config.h"
13
 
14
-#define X264_BUILD 135
15
+#define X264_BUILD 142
16
 
17
 /* Application developers planning to link against a shared library version of
18
  * libx264 from a Microsoft Visual Studio or similar development environment
19
@@ -98,12 +98,15 @@
20
     int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
21
     int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
22
 
23
-    /* Size of payload in bytes. */
24
+    /* Size of payload (including any padding) in bytes. */
25
     int     i_payload;
26
     /* If param->b_annexb is set, Annex-B bytestream with startcode.
27
      * Otherwise, startcode is replaced with a 4-byte size.
28
      * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
29
     uint8_t *p_payload;
30
+
31
+    /* Size of padding in bytes. */
32
+    int i_padding;
33
 } x264_nal_t;
34
 
35
 /****************************************************************************
36
@@ -122,30 +125,29 @@
37
 #define X264_CPU_SSSE3           0x0000040
38
 #define X264_CPU_SSE4            0x0000080  /* SSE4.1 */
39
 #define X264_CPU_SSE42           0x0000100  /* SSE4.2 */
40
-#define X264_CPU_SSE_MISALIGN    0x0000200  /* Phenom support for misaligned SSE instruction arguments */
41
-#define X264_CPU_LZCNT           0x0000400  /* Phenom support for "leading zero count" instruction. */
42
-#define X264_CPU_AVX             0x0000800  /* AVX support: requires OS support even if YMM registers aren't used. */
43
-#define X264_CPU_XOP             0x0001000  /* AMD XOP */
44
-#define X264_CPU_FMA4            0x0002000  /* AMD FMA4 */
45
-#define X264_CPU_AVX2            0x0004000  /* AVX2 */
46
-#define X264_CPU_FMA3            0x0008000  /* Intel FMA3 */
47
-#define X264_CPU_BMI1            0x0010000  /* BMI1 */
48
-#define X264_CPU_BMI2            0x0020000  /* BMI2 */
49
+#define X264_CPU_LZCNT           0x0000200  /* Phenom support for "leading zero count" instruction. */
50
+#define X264_CPU_AVX             0x0000400  /* AVX support: requires OS support even if YMM registers aren't used. */
51
+#define X264_CPU_XOP             0x0000800  /* AMD XOP */
52
+#define X264_CPU_FMA4            0x0001000  /* AMD FMA4 */
53
+#define X264_CPU_AVX2            0x0002000  /* AVX2 */
54
+#define X264_CPU_FMA3            0x0004000  /* Intel FMA3 */
55
+#define X264_CPU_BMI1            0x0008000  /* BMI1 */
56
+#define X264_CPU_BMI2            0x0010000  /* BMI2 */
57
 /* x86 modifiers */
58
-#define X264_CPU_CACHELINE_32    0x0040000  /* avoid memory loads that span the border between two cachelines */
59
-#define X264_CPU_CACHELINE_64    0x0080000  /* 32/64 is the size of a cacheline in bytes */
60
-#define X264_CPU_SSE2_IS_SLOW    0x0100000  /* avoid most SSE2 functions on Athlon64 */
61
-#define X264_CPU_SSE2_IS_FAST    0x0200000  /* a few functions are only faster on Core2 and Phenom */
62
-#define X264_CPU_SLOW_SHUFFLE    0x0400000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
63
-#define X264_CPU_STACK_MOD4      0x0800000  /* if stack is only mod4 and not mod16 */
64
-#define X264_CPU_SLOW_CTZ        0x1000000  /* BSR/BSF x86 instructions are really slow on some CPUs */
65
-#define X264_CPU_SLOW_ATOM       0x2000000  /* The Atom is terrible: slow SSE unaligned loads, slow
66
+#define X264_CPU_CACHELINE_32    0x0020000  /* avoid memory loads that span the border between two cachelines */
67
+#define X264_CPU_CACHELINE_64    0x0040000  /* 32/64 is the size of a cacheline in bytes */
68
+#define X264_CPU_SSE2_IS_SLOW    0x0080000  /* avoid most SSE2 functions on Athlon64 */
69
+#define X264_CPU_SSE2_IS_FAST    0x0100000  /* a few functions are only faster on Core2 and Phenom */
70
+#define X264_CPU_SLOW_SHUFFLE    0x0200000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
71
+#define X264_CPU_STACK_MOD4      0x0400000  /* if stack is only mod4 and not mod16 */
72
+#define X264_CPU_SLOW_CTZ        0x0800000  /* BSR/BSF x86 instructions are really slow on some CPUs */
73
+#define X264_CPU_SLOW_ATOM       0x1000000  /* The Atom is terrible: slow SSE unaligned loads, slow
74
                                              * SIMD multiplies, slow SIMD variable shifts, slow pshufb,
75
                                              * cacheline split penalties -- gather everything here that
76
                                              * isn't shared by other CPUs to avoid making half a dozen
77
                                              * new SLOW flags. */
78
-#define X264_CPU_SLOW_PSHUFB     0x4000000  /* such as on the Intel Atom */
79
-#define X264_CPU_SLOW_PALIGNR    0x8000000  /* such as on the AMD Bobcat */
80
+#define X264_CPU_SLOW_PSHUFB     0x2000000  /* such as on the Intel Atom */
81
+#define X264_CPU_SLOW_PALIGNR    0x4000000  /* such as on the AMD Bobcat */
82
 
83
 /* PowerPC */
84
 #define X264_CPU_ALTIVEC         0x0000001
85
@@ -213,12 +215,13 @@
86
 #define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
87
 #define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
88
 #define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
89
-#define X264_CSP_I444           0x0007  /* yuv 4:4:4 planar */
90
-#define X264_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
91
-#define X264_CSP_BGR            0x0009  /* packed bgr 24bits   */
92
-#define X264_CSP_BGRA           0x000a  /* packed bgr 32bits   */
93
-#define X264_CSP_RGB            0x000b  /* packed rgb 24bits   */
94
-#define X264_CSP_MAX            0x000c  /* end of list */
95
+#define X264_CSP_V210           0x0007  /* 10-bit yuv 4:2:2 packed in 32 */
96
+#define X264_CSP_I444           0x0008  /* yuv 4:4:4 planar */
97
+#define X264_CSP_YV24           0x0009  /* yvu 4:4:4 planar */
98
+#define X264_CSP_BGR            0x000a  /* packed bgr 24bits   */
99
+#define X264_CSP_BGRA           0x000b  /* packed bgr 32bits   */
100
+#define X264_CSP_RGB            0x000c  /* packed rgb 24bits   */
101
+#define X264_CSP_MAX            0x000d  /* end of list */
102
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
103
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
104
 
105
@@ -319,6 +322,7 @@
106
     int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
107
     int         b_open_gop;
108
     int         b_bluray_compat;
109
+    int         i_avcintra_class;
110
 
111
     int         b_deblocking_filter;
112
     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
113
@@ -331,7 +335,7 @@
114
     int         b_constrained_intra;
115
 
116
     int         i_cqm_preset;
117
-    char        *psz_cqm_file;      /* JM format */
118
+    char        *psz_cqm_file;      /* filename (in UTF-8) of CQM file, JM format */
119
     uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
120
     uint8_t     cqm_4py[16];
121
     uint8_t     cqm_4ic[16];
122
@@ -345,9 +349,8 @@
123
     void        (*pf_log)( void *, int i_level, const char *psz, va_list );
124
     void        *p_log_private;
125
     int         i_log_level;
126
-    int         b_visualize;
127
     int         b_full_recon;   /* fully reconstruct frames, even when not necessary for encoding.  Implied by psz_dump_yuv */
128
-    char        *psz_dump_yuv;  /* filename for reconstructed frames */
129
+    char        *psz_dump_yuv;  /* filename (in UTF-8) for reconstructed frames */
130
 
131
     /* Encoder analyser parameters */
132
     struct
133
@@ -406,6 +409,10 @@
134
         float       f_ip_factor;
135
         float       f_pb_factor;
136
 
137
+        /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
138
+         * Implied by NAL-HRD CBR. */
139
+        int         b_filler;
140
+
141
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
142
         float       f_aq_strength;
143
         int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
144
@@ -413,9 +420,9 @@
145
 
146
         /* 2pass */
147
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
148
-        char        *psz_stat_out;
149
+        char        *psz_stat_out;  /* output filename (in UTF-8) of the 2pass stats file */
150
         int         b_stat_read;    /* Read stat from psz_stat_in and use it */
151
-        char        *psz_stat_in;
152
+        char        *psz_stat_in;   /* input filename (in UTF-8) of the 2pass stats file */
153
 
154
         /* 2pass params (same as ffmpeg ones) */
155
         float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
156
@@ -483,7 +490,7 @@
157
     int b_opencl;            /* use OpenCL when available */
158
     int i_opencl_device;     /* specify count of GPU devices to skip, for CLI users */
159
     void *opencl_device_id;  /* pass explicit cl_device_id as void*, for API users */
160
-    char *psz_clbin_file;    /* compiled OpenCL kernel cache file */
161
+    char *psz_clbin_file;    /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
162
 
163
     /* Slicing parameters */
164
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
165
@@ -877,13 +884,15 @@
166
 /* x264_encoder_headers:
167
  *      return the SPS and PPS that will be used for the whole stream.
168
  *      *pi_nal is the number of NAL units outputted in pp_nal.
169
+ *      returns the number of bytes in the returned NALs.
170
  *      returns negative on error.
171
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
172
 int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
173
 /* x264_encoder_encode:
174
  *      encode one picture.
175
  *      *pi_nal is the number of NAL units outputted in pp_nal.
176
- *      returns negative on error, zero if no NAL units returned.
177
+ *      returns the number of bytes in the returned NALs.
178
+ *      returns negative on error and zero if no NAL units returned.
179
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
180
 int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
181
 /* x264_encoder_close:
182
x264-snapshot-20130723-2245.tar.bz2/x264cli.h -> x264-snapshot-20140321-2245.tar.bz2/x264cli.h Changed
24
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * x264cli.h: x264cli common
4
  *****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
  *
8
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
  *          Loren Merritt <lorenm@u.washington.edu>
10
@@ -63,6 +63,13 @@
11
 void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
12
 void x264_cli_printf( int i_level, const char *fmt, ... );
13
 
14
+#ifdef _WIN32
15
+void x264_cli_set_console_title( const char *title );
16
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file );
17
+#else
18
+#define x264_cli_set_console_title( title )
19
+#endif
20
+
21
 #define RETURN_IF_ERR( cond, name, ret, ... )\
22
 if( cond )\
23
 {\
24
x264-snapshot-20130723-2245.tar.bz2/x264dll.c -> x264-snapshot-20140321-2245.tar.bz2/x264dll.c Changed
19
 
1
@@ -1,7 +1,7 @@
2
 /*****************************************************************************
3
  * x264dll: x264 DLLMain for win32
4
  *****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
  *
8
  * Authors: Anton Mitrofanov <BugMaster@narod.ru>
9
  *
10
@@ -27,7 +27,7 @@
11
 #include <windows.h>
12
 
13
 /* Callback for our DLL so we can initialize pthread */
14
-BOOL WINAPI DllMain( HANDLE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
15
+BOOL WINAPI DllMain( HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
16
 {
17
 #if PTW32_STATIC_LIB
18
     switch( fdwReason )
19
x264-snapshot-20130723-2245.tar.bz2/x264res.rc -> x264-snapshot-20140321-2245.tar.bz2/x264res.rc Changed
39
 
1
@@ -1,9 +1,9 @@
2
 /*****************************************************************************
3
  * x264res.rc: windows resource file
4
  *****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
  *
8
- * Authors: Henrik Gramner <hengar-6@student.ltu.se>
9
+ * Authors: Henrik Gramner <henrik@gramner.com>
10
  *
11
  * This program is free software; you can redistribute it and/or modify
12
  * it under the terms of the GNU General Public License as published by
13
@@ -50,7 +50,7 @@
14
 BEGIN
15
     BLOCK "StringFileInfo"
16
     BEGIN
17
-        BLOCK "040904E4"
18
+        BLOCK "040904B0"
19
         BEGIN
20
             VALUE "CompanyName",      "x264 project"
21
 #ifdef DLL
22
@@ -60,7 +60,7 @@
23
 #endif
24
             VALUE "FileVersion",      X264_POINTVER
25
             VALUE "InternalName",     "x264"
26
-            VALUE "LegalCopyright",   "Copyright (C) 2003-2013 x264 project"
27
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2014 x264 project"
28
 #ifdef DLL
29
             VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
30
 #else
31
@@ -73,6 +73,6 @@
32
 
33
     BLOCK "VarFileInfo"
34
     BEGIN
35
-        VALUE "Translation", 0x0409, 0x04E4
36
+        VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
37
     END
38
 END
39