Packman Build Service PMBS

libx264.changes Changed

​x
 
@@ -1,4 +1,9 @@
 -------------------------------------------------------------------
+Sat Mar 22 17:10:14 UTC 2014 - i@margueirte.su
+
+- update version 20140321.
+
+-------------------------------------------------------------------
 Tue Nov 19 07:53:08 UTC 2013 - obs@botter.cc
 
 - add -fno-aggressive-loop-optimizations to extra-cflags in
​

libx264.spec Changed

 
@@ -14,8 +14,8 @@
 # Please submit bugfixes or comments via http://bugs.links2linux.org/
 
 Name:           libx264
-%define soname  135
-%define svn     20130723
+%define soname  142
+%define svn     20140321
 Version:        0.%{soname}svn%{svn}
 Release:        1
 License:        GPL-2.0+
​

x264-snapshot-20130723-2245.tar.bz2/common/display-x11.c Deleted

@@ -1,218 +0,0 @@
-/*****************************************************************************
- * display-x11.c: x11 interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include <X11/Xlib.h>
-#include <X11/Xutil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common.h"
-#include "display.h"
-
-static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask;
-
-static Display *disp_display = NULL;
-static struct disp_window
-{
-    int init;
-    Window window;
-} disp_window[10];
-
-static inline void disp_chkerror( int cond, char *e )
-{
-    if( !cond )
-        return;
-    fprintf( stderr, "error: %s\n", e ? e : "?" );
-    abort();
-}
-
-static void disp_init_display( void )
-{
-    Visual *visual;
-    int dpy_class;
-    int screen;
-    int dpy_depth;
-
-    if( disp_display )
-        return;
-    memset( &disp_window, 0, sizeof(disp_window) );
-    disp_display = XOpenDisplay( "" );
-    disp_chkerror( !disp_display, "no display" );
-    screen = DefaultScreen( disp_display );
-    visual = DefaultVisual( disp_display, screen );
-    dpy_class = visual->class;
-    dpy_depth = DefaultDepth( disp_display, screen );
-    disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32)
-        || (dpy_class == TrueColor && dpy_depth == 24)
-        || (dpy_class == TrueColor && dpy_depth == 16)
-        || (dpy_class == PseudoColor && dpy_depth == 8)),
-        "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" );
-}
-
-static void disp_init_window( int num, int width, int height, const unsigned char *title )
-{
-    XSetWindowAttributes xswa;
-    XEvent xev;
-    int screen = DefaultScreen(disp_display);
-    Visual *visual = DefaultVisual (disp_display, screen);
-    char buf[200];
-    Window window;
-
-    if( title )
-        snprintf( buf, 200, "%s: %i/disp", title, num );
-    else
-        snprintf( buf, 200, "%i/disp", num );
-
-    XSizeHints *shint = XAllocSizeHints();
-    disp_chkerror( !shint, "memerror" );
-    shint->min_width = shint->max_width = shint->width = width;
-    shint->min_height = shint->max_height = shint->height = height;
-    shint->flags = PSize | PMinSize | PMaxSize;
-    disp_chkerror( num < 0 || num >= 10, "bad win num" );
-    if( !disp_window[num].init )
-    {
-        unsigned int mask = 0;
-        disp_window[num].init = 1;
-        unsigned int bg = WhitePixel( disp_display, screen );
-        unsigned int fg = BlackPixel( disp_display, screen );
-        int dpy_depth = DefaultDepth( disp_display, screen );
-        if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 )
-        {
-            mask |= CWColormap;
-            xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone );
-        }
-        xswa.background_pixel = bg;
-        xswa.border_pixel = fg;
-        xswa.backing_store = Always;
-        xswa.backing_planes = -1;
-        xswa.bit_gravity = NorthWestGravity;
-        mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity;
-        window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ),
-                                shint->x, shint->y, shint->width, shint->height,
-                                1, dpy_depth, InputOutput, visual, mask, &xswa );
-        disp_window[num].window = window;
-
-        XSelectInput( disp_display, window, event_mask );
-        XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-        XMapWindow( disp_display, window );
-
-        do {
-            XNextEvent( disp_display, &xev );
-        } while( xev.type != MapNotify || xev.xmap.event != window );
-    }
-    window = disp_window[num].window;
-    XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-    XResizeWindow( disp_display, window, width, height );
-    XSync( disp_display, 1 );
-    XFree( shint );
-}
-
-void disp_sync( void )
-{
-    XSync( disp_display, 1 );
-}
-
-void disp_setcolor( unsigned char *name )
-{
-    XColor c_exact, c_nearest;
-
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    Colormap cm = DefaultColormap( disp_display, screen );
-    Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact );
-    disp_chkerror( st != 1, "XAllocNamedColor error" );
-    XSetForeground( disp_display, gc, c_nearest.pixel );
-}
-
-void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title )
-{
-    char dummy;
-
-    disp_init_display();
-    disp_init_window( num, width, height, title );
-    int screen = DefaultScreen( disp_display );
-    Visual *visual = DefaultVisual( disp_display, screen );
-    int dpy_depth = DefaultDepth( disp_display, screen );
-    XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 );
-    disp_chkerror( !ximage, "no ximage" );
-#if WORDS_BIGENDIAN
-    ximage->byte_order = MSBFirst;
-    ximage->bitmap_bit_order = MSBFirst;
-#else
-    ximage->byte_order = LSBFirst;
-    ximage->bitmap_bit_order = LSBFirst;
-#endif
-
-    int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char);
-    uint8_t *image = malloc( width * height * pixelsize );
-    disp_chkerror( !image, "malloc failed" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize );
-    ximage->data = image;
-    GC gc = DefaultGC( disp_display, screen );
-
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-
-    XDestroyImage( ximage );
-    XSync( disp_display, 1 );
-
-}
-
-void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom)
-{
-    unsigned char *dataz = malloc( width*zoom * height*zoom );
-    disp_chkerror( !dataz, "malloc" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            for( int y0 = 0; y0 < zoom; y0++ )
-                for( int x0 = 0; x0 < zoom; x0++ )
-                    dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x];
-    disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title );
-    free( dataz );
-}
-
-void disp_point( int num, int x1, int y1 )

 
@@ -1,218 +0,0 @@
-/*****************************************************************************
- * display-x11.c: x11 interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include <X11/Xlib.h>
-#include <X11/Xutil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common.h"
-#include "display.h"
-
-static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask;
-
-static Display *disp_display = NULL;
-static struct disp_window
-{
-    int init;
-    Window window;
-} disp_window[10];
-
-static inline void disp_chkerror( int cond, char *e )
-{
-    if( !cond )
-        return;
-    fprintf( stderr, "error: %s\n", e ? e : "?" );
-    abort();
-}
-
-static void disp_init_display( void )
-{
-    Visual *visual;
-    int dpy_class;
-    int screen;
-    int dpy_depth;
-
-    if( disp_display )
-        return;
-    memset( &disp_window, 0, sizeof(disp_window) );
-    disp_display = XOpenDisplay( "" );
-    disp_chkerror( !disp_display, "no display" );
-    screen = DefaultScreen( disp_display );
-    visual = DefaultVisual( disp_display, screen );
-    dpy_class = visual->class;
-    dpy_depth = DefaultDepth( disp_display, screen );
-    disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32)
-        || (dpy_class == TrueColor && dpy_depth == 24)
-        || (dpy_class == TrueColor && dpy_depth == 16)
-        || (dpy_class == PseudoColor && dpy_depth == 8)),
-        "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" );
-}
-
-static void disp_init_window( int num, int width, int height, const unsigned char *title )
-{
-    XSetWindowAttributes xswa;
-    XEvent xev;
-    int screen = DefaultScreen(disp_display);
-    Visual *visual = DefaultVisual (disp_display, screen);
-    char buf[200];
-    Window window;
-
-    if( title )
-        snprintf( buf, 200, "%s: %i/disp", title, num );
-    else
-        snprintf( buf, 200, "%i/disp", num );
-
-    XSizeHints *shint = XAllocSizeHints();
-    disp_chkerror( !shint, "memerror" );
-    shint->min_width = shint->max_width = shint->width = width;
-    shint->min_height = shint->max_height = shint->height = height;
-    shint->flags = PSize | PMinSize | PMaxSize;
-    disp_chkerror( num < 0 || num >= 10, "bad win num" );
-    if( !disp_window[num].init )
-    {
-        unsigned int mask = 0;
-        disp_window[num].init = 1;
-        unsigned int bg = WhitePixel( disp_display, screen );
-        unsigned int fg = BlackPixel( disp_display, screen );
-        int dpy_depth = DefaultDepth( disp_display, screen );
-        if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 )
-        {
-            mask |= CWColormap;
-            xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone );
-        }
-        xswa.background_pixel = bg;
-        xswa.border_pixel = fg;
-        xswa.backing_store = Always;
-        xswa.backing_planes = -1;
-        xswa.bit_gravity = NorthWestGravity;
-        mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity;
-        window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ),
-                                shint->x, shint->y, shint->width, shint->height,
-                                1, dpy_depth, InputOutput, visual, mask, &xswa );
-        disp_window[num].window = window;
-
-        XSelectInput( disp_display, window, event_mask );
-        XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-        XMapWindow( disp_display, window );
-
-        do {
-            XNextEvent( disp_display, &xev );
-        } while( xev.type != MapNotify || xev.xmap.event != window );
-    }
-    window = disp_window[num].window;
-    XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-    XResizeWindow( disp_display, window, width, height );
-    XSync( disp_display, 1 );
-    XFree( shint );
-}
-
-void disp_sync( void )
-{
-    XSync( disp_display, 1 );
-}
-
-void disp_setcolor( unsigned char *name )
-{
-    XColor c_exact, c_nearest;
-
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    Colormap cm = DefaultColormap( disp_display, screen );
-    Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact );
-    disp_chkerror( st != 1, "XAllocNamedColor error" );
-    XSetForeground( disp_display, gc, c_nearest.pixel );
-}
-
-void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title )
-{
-    char dummy;
-
-    disp_init_display();
-    disp_init_window( num, width, height, title );
-    int screen = DefaultScreen( disp_display );
-    Visual *visual = DefaultVisual( disp_display, screen );
-    int dpy_depth = DefaultDepth( disp_display, screen );
-    XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 );
-    disp_chkerror( !ximage, "no ximage" );
-#if WORDS_BIGENDIAN
-    ximage->byte_order = MSBFirst;
-    ximage->bitmap_bit_order = MSBFirst;
-#else
-    ximage->byte_order = LSBFirst;
-    ximage->bitmap_bit_order = LSBFirst;
-#endif
-
-    int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char);
-    uint8_t *image = malloc( width * height * pixelsize );
-    disp_chkerror( !image, "malloc failed" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize );
-    ximage->data = image;
-    GC gc = DefaultGC( disp_display, screen );
-
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-
-    XDestroyImage( ximage );
-    XSync( disp_display, 1 );
-
-}
-
-void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom)
-{
-    unsigned char *dataz = malloc( width*zoom * height*zoom );
-    disp_chkerror( !dataz, "malloc" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            for( int y0 = 0; y0 < zoom; y0++ )
-                for( int x0 = 0; x0 < zoom; x0++ )
-                    dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x];
-    disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title );
-    free( dataz );
-}
-
-void disp_point( int num, int x1, int y1 )
​

x264-snapshot-20130723-2245.tar.bz2/common/display.h Deleted

@@ -1,41 +0,0 @@
-/*****************************************************************************
- * display.h: x11 visualization interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_DISPLAY_H
-#define X264_DISPLAY_H
-
-void disp_sync(void);
-void disp_setcolor(unsigned char *name);
-/* Display a region of byte wide memory as a grayscale image.
- * num is the window to use for displaying. */
-void disp_gray(int num, char *data, int width, int height,
-               int stride, const unsigned char *title);
-void disp_gray_zoom(int num, char *data, int width, int height,
-               int stride, const unsigned char *title, int zoom);
-void disp_point(int num, int x1, int y1);
-void disp_line(int num, int x1, int y1, int x2, int y2);
-void disp_rect(int num, int x1, int y1, int x2, int y2);
-
-#endif

 
@@ -1,41 +0,0 @@
-/*****************************************************************************
- * display.h: x11 visualization interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_DISPLAY_H
-#define X264_DISPLAY_H
-
-void disp_sync(void);
-void disp_setcolor(unsigned char *name);
-/* Display a region of byte wide memory as a grayscale image.
- * num is the window to use for displaying. */
-void disp_gray(int num, char *data, int width, int height,
-               int stride, const unsigned char *title);
-void disp_gray_zoom(int num, char *data, int width, int height,
-               int stride, const unsigned char *title, int zoom);
-void disp_point(int num, int x1, int y1);
-void disp_line(int num, int x1, int y1, int x2, int y2);
-void disp_rect(int num, int x1, int y1, int x2, int y2);
-
-#endif
​

x264-snapshot-20130723-2245.tar.bz2/common/visualize.c Deleted

@@ -1,341 +0,0 @@
-/*****************************************************************************
- * visualize.c: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-/*
- * Some explanation of the symbols used:
- * Red/pink: intra block
- * Blue: inter block
- * Green: skip block
- * Yellow: B-block (not visualized properly yet)
- *
- * Motion vectors have black dot at their target (ie. at the MB center),
- * instead of arrowhead. The black dot is enclosed in filled diamond with radius
- * depending on reference frame number (one frame back = zero width, normal case).
- *
- * The intra blocks have generally lines drawn perpendicular
- * to the prediction direction, so for example, if there is a pink block
- * with horizontal line at the top of it, it is interpolated by assuming
- * luma to be vertically constant.
- * DC predicted blocks have both horizontal and vertical lines,
- * pink blocks with a diagonal line are predicted using the planar function.
- */
-
-#include "common.h"
-#include "visualize.h"
-#include "display.h"
-
-typedef struct
-{
-    int     i_type;
-    int     i_partition;
-    int     i_sub_partition[4];
-    int     i_intra16x16_pred_mode;
-    int     intra4x4_pred_mode[4][4];
-    int8_t  ref[2][4][4];                  /* [list][y][x] */
-    int16_t mv[2][4][4][2];                /* [list][y][x][mvxy] */
-} visualize_t;
-
-/* Return string from stringlist corresponding to the given code */
-#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code)
-
-typedef struct
-{
-    int code;
-    char *string;
-} stringlist_t;
-
-static char *get_string( const stringlist_t *sl, int entries, int code )
-{
-    for( int i = 0; i < entries; i++ )
-        if( sl[i].code == code )
-            return sl[i].string;
-    return "?";
-}
-
-/* Plot motion vector */
-static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
-{
-    int dx = dmv[0];
-    int dy = dmv[1];
-
-    dx = (dx * zoom + 2) >> 2;
-    dy = (dy * zoom + 2) >> 2;
-    disp_line( 0, x0, y0, x0+dx, y0+dy );
-    for( int i = 1; i < ref; i++ )
-    {
-        disp_line( 0, x0  , y0-i, x0+i, y0   );
-        disp_line( 0, x0+i, y0  , x0  , y0+i );
-        disp_line( 0, x0  , y0+i, x0-i, y0   );
-        disp_line( 0, x0-i, y0  , x0  , y0-i );
-    }
-    disp_setcolor( "black" );
-    disp_point( 0, x0, y0 );
-    disp_setcolor( col );
-}
-
-int x264_visualize_init( x264_t *h )
-{
-    CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
-    return 0;
-fail:
-    return -1;
-}
-
-void x264_visualize_mb( x264_t *h )
-{
-    visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy;
-
-    /* Save all data for the MB that we need for drawing the visualization */
-    v->i_type = h->mb.i_type;
-    v->i_partition = h->mb.i_partition;
-    for( int i = 0; i < 4; i++ )
-        v->i_sub_partition[i] = h->mb.i_sub_partition[i];
-    for( int y = 0; y < 4; y++ )
-        for( int x = 0; x < 4; x++ )
-            v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x];
-    for( int l = 0; l < 2; l++ )
-        for( int y = 0; y < 4; y++ )
-            for( int x = 0; x < 4; x++ )
-            {
-                for( int i = 0; i < 2; i++ )
-                    v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i];
-                v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x];
-            }
-    v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode;
-}
-
-void x264_visualize_close( x264_t *h )
-{
-    x264_free(h->visualize);
-}
-
-/* Display visualization (block types, MVs) of the encoded frame */
-/* FIXME: B-type MBs not handled yet properly */
-void x264_visualize_show( x264_t *h )
-{
-    static const stringlist_t mb_types[] =
-    {
-        /* Block types marked as NULL will not be drawn */
-        { I_4x4   , "red" },
-        { I_8x8   , "#ff5640" },
-        { I_16x16 , "#ff8060" },
-        { I_PCM   , "violet" },
-        { P_L0    , "SlateBlue" },
-        { P_8x8   , "blue" },
-        { P_SKIP  , "green" },
-        { B_DIRECT, "yellow" },
-        { B_L0_L0 , "yellow" },
-        { B_L0_L1 , "yellow" },
-        { B_L0_BI , "yellow" },
-        { B_L1_L0 , "yellow" },
-        { B_L1_L1 , "yellow" },
-        { B_L1_BI , "yellow" },
-        { B_BI_L0 , "yellow" },
-        { B_BI_L1 , "yellow" },
-        { B_BI_BI , "yellow" },
-        { B_8x8   , "yellow" },
-        { B_SKIP  , "yellow" },
-    };
-
-    static const int waitkey = 1;     /* Wait for enter after each frame */
-    static const int drawbox = 1;     /* Draw box around each block */
-    static const int borders = 0;     /* Display extrapolated borders outside frame */
-    static const int zoom = 2;        /* Zoom factor */
-
-    static const int pad = 32;
-    pixel *const frame = h->fdec->plane[0];
-    const int width = h->param.i_width;
-    const int height = h->param.i_height;
-    const int stride = h->fdec->i_stride[0];
-
-    if( borders )
-        disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom );
-    else
-        disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
-
-    for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
-    {
-        visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
-        const int mb_y = mb_xy / h->mb.i_mb_width;
-        const int mb_x = mb_xy % h->mb.i_mb_width;
-        char *const col = GET_STRING( mb_types, v->i_type );
-        int x = mb_x*16*zoom;
-        int y = mb_y*16*zoom;
-        int l = 0;
-
-        if( !col )
-            continue;
-
-        if( borders )
-        {
-            x += pad*zoom;
-            y += pad*zoom;
-        }
-
-        disp_setcolor( col );
-        if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 );
-

 
@@ -1,341 +0,0 @@
-/*****************************************************************************
- * visualize.c: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-/*
- * Some explanation of the symbols used:
- * Red/pink: intra block
- * Blue: inter block
- * Green: skip block
- * Yellow: B-block (not visualized properly yet)
- *
- * Motion vectors have black dot at their target (ie. at the MB center),
- * instead of arrowhead. The black dot is enclosed in filled diamond with radius
- * depending on reference frame number (one frame back = zero width, normal case).
- *
- * The intra blocks have generally lines drawn perpendicular
- * to the prediction direction, so for example, if there is a pink block
- * with horizontal line at the top of it, it is interpolated by assuming
- * luma to be vertically constant.
- * DC predicted blocks have both horizontal and vertical lines,
- * pink blocks with a diagonal line are predicted using the planar function.
- */
-
-#include "common.h"
-#include "visualize.h"
-#include "display.h"
-
-typedef struct
-{
-    int     i_type;
-    int     i_partition;
-    int     i_sub_partition[4];
-    int     i_intra16x16_pred_mode;
-    int     intra4x4_pred_mode[4][4];
-    int8_t  ref[2][4][4];                  /* [list][y][x] */
-    int16_t mv[2][4][4][2];                /* [list][y][x][mvxy] */
-} visualize_t;
-
-/* Return string from stringlist corresponding to the given code */
-#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code)
-
-typedef struct
-{
-    int code;
-    char *string;
-} stringlist_t;
-
-static char *get_string( const stringlist_t *sl, int entries, int code )
-{
-    for( int i = 0; i < entries; i++ )
-        if( sl[i].code == code )
-            return sl[i].string;
-    return "?";
-}
-
-/* Plot motion vector */
-static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
-{
-    int dx = dmv[0];
-    int dy = dmv[1];
-
-    dx = (dx * zoom + 2) >> 2;
-    dy = (dy * zoom + 2) >> 2;
-    disp_line( 0, x0, y0, x0+dx, y0+dy );
-    for( int i = 1; i < ref; i++ )
-    {
-        disp_line( 0, x0  , y0-i, x0+i, y0   );
-        disp_line( 0, x0+i, y0  , x0  , y0+i );
-        disp_line( 0, x0  , y0+i, x0-i, y0   );
-        disp_line( 0, x0-i, y0  , x0  , y0-i );
-    }
-    disp_setcolor( "black" );
-    disp_point( 0, x0, y0 );
-    disp_setcolor( col );
-}
-
-int x264_visualize_init( x264_t *h )
-{
-    CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
-    return 0;
-fail:
-    return -1;
-}
-
-void x264_visualize_mb( x264_t *h )
-{
-    visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy;
-
-    /* Save all data for the MB that we need for drawing the visualization */
-    v->i_type = h->mb.i_type;
-    v->i_partition = h->mb.i_partition;
-    for( int i = 0; i < 4; i++ )
-        v->i_sub_partition[i] = h->mb.i_sub_partition[i];
-    for( int y = 0; y < 4; y++ )
-        for( int x = 0; x < 4; x++ )
-            v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x];
-    for( int l = 0; l < 2; l++ )
-        for( int y = 0; y < 4; y++ )
-            for( int x = 0; x < 4; x++ )
-            {
-                for( int i = 0; i < 2; i++ )
-                    v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i];
-                v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x];
-            }
-    v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode;
-}
-
-void x264_visualize_close( x264_t *h )
-{
-    x264_free(h->visualize);
-}
-
-/* Display visualization (block types, MVs) of the encoded frame */
-/* FIXME: B-type MBs not handled yet properly */
-void x264_visualize_show( x264_t *h )
-{
-    static const stringlist_t mb_types[] =
-    {
-        /* Block types marked as NULL will not be drawn */
-        { I_4x4   , "red" },
-        { I_8x8   , "#ff5640" },
-        { I_16x16 , "#ff8060" },
-        { I_PCM   , "violet" },
-        { P_L0    , "SlateBlue" },
-        { P_8x8   , "blue" },
-        { P_SKIP  , "green" },
-        { B_DIRECT, "yellow" },
-        { B_L0_L0 , "yellow" },
-        { B_L0_L1 , "yellow" },
-        { B_L0_BI , "yellow" },
-        { B_L1_L0 , "yellow" },
-        { B_L1_L1 , "yellow" },
-        { B_L1_BI , "yellow" },
-        { B_BI_L0 , "yellow" },
-        { B_BI_L1 , "yellow" },
-        { B_BI_BI , "yellow" },
-        { B_8x8   , "yellow" },
-        { B_SKIP  , "yellow" },
-    };
-
-    static const int waitkey = 1;     /* Wait for enter after each frame */
-    static const int drawbox = 1;     /* Draw box around each block */
-    static const int borders = 0;     /* Display extrapolated borders outside frame */
-    static const int zoom = 2;        /* Zoom factor */
-
-    static const int pad = 32;
-    pixel *const frame = h->fdec->plane[0];
-    const int width = h->param.i_width;
-    const int height = h->param.i_height;
-    const int stride = h->fdec->i_stride[0];
-
-    if( borders )
-        disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom );
-    else
-        disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
-
-    for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
-    {
-        visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
-        const int mb_y = mb_xy / h->mb.i_mb_width;
-        const int mb_x = mb_xy % h->mb.i_mb_width;
-        char *const col = GET_STRING( mb_types, v->i_type );
-        int x = mb_x*16*zoom;
-        int y = mb_y*16*zoom;
-        int l = 0;
-
-        if( !col )
-            continue;
-
-        if( borders )
-        {
-            x += pad*zoom;
-            y += pad*zoom;
-        }
-
-        disp_setcolor( col );
-        if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 );
-
​

x264-snapshot-20130723-2245.tar.bz2/common/visualize.h Deleted

@@ -1,36 +0,0 @@
-/*****************************************************************************
- * visualize.h: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_VISUALIZE_H
-#define X264_VISUALIZE_H
-
-#include "common/common.h"
-
-int  x264_visualize_init( x264_t *h );
-void x264_visualize_mb( x264_t *h );
-void x264_visualize_show( x264_t *h );
-void x264_visualize_close( x264_t *h );
-
-#endif

 
@@ -1,36 +0,0 @@
-/*****************************************************************************
- * visualize.h: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_VISUALIZE_H
-#define X264_VISUALIZE_H
-
-#include "common/common.h"
-
-int  x264_visualize_init( x264_t *h );
-void x264_visualize_mb( x264_t *h );
-void x264_visualize_show( x264_t *h );
-void x264_visualize_close( x264_t *h );
-
-#endif
​

x264-snapshot-20130723-2245.tar.bz2/tools/xyuv.c Deleted

@@ -1,792 +0,0 @@
-/*****************************************************************************
- * xyuv.c: a SDL yuv 420 planer viewer.
- *****************************************************************************
- * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-
-#include <SDL/SDL.h>
-
-#define YUV_MAX 20
-#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps"
-typedef struct
-{
-    /* globals */
-    int     i_width;
-    int     i_height;
-    int     i_frame_size;
-    int     i_frame;
-    int     i_frames;
-    float   f_fps;
-
-    float   f_y;
-
-    int     b_pause;
-    int     b_grid;
-    int     b_split;
-    int     b_diff;
-    int     i_join;
-
-    /* Constructed picture */
-    int     i_wall_width;   /* in picture count */
-
-    /* YUV files */
-    int     i_yuv;
-    struct
-    {
-        char    *name;
-        FILE    *f;         /* handles */
-        int     i_frames;   /* frames count */
-
-        /* Position in the whole picture */
-        int     x, y;
-    } yuv[YUV_MAX];
-
-    /* SDL */
-    int i_sdl_width;
-    int i_sdl_height;
-
-    int i_display_width;
-    int i_display_height;
-    char *title;
-
-    SDL_Surface *screen;
-    SDL_Overlay *overlay;
-
-    /* */
-    uint8_t *pic;
-
-} xyuv_t;
-
-xyuv_t xyuv = {
-    .i_width = 0,
-    .i_height = 0,
-    .i_frame  = 1,
-    .i_frames = 0,
-    .f_fps = 25.0,
-    .f_y = 0.0,
-    .i_wall_width = 0,
-
-    .i_yuv = 0,
-
-    .b_pause = 0,
-    .b_split = 0,
-    .b_diff = 0,
-    .i_join = -1,
-
-    .title = NULL,
-    .pic = NULL,
-};
-
-static void help( void )
-{
-    fprintf( stderr,
-             "Syntax: xyuv [options] file [file2 ...]\n"
-             "\n"
-             "      --help                  Print this help\n"
-             "\n"
-             "  -s, --size <WIDTHxHEIGHT>   Set input size\n"
-             "  -w, --width <integer>       Set width\n"
-             "  -h, --height <integer>      Set height\n"
-             "\n"
-             "  -S, --split                 Show splited Y/U/V planes\n"
-             "  -d, --diff                  Show difference (only 2 files) in split mode\n"
-             "  -j, --joint <integer>\n"
-             "\n"
-             "  -y <float>                  Set Y factor\n"
-             "\n"
-             "  -g, --grid                  Show a grid (macroblock 16x16)\n"
-             "  -W <integer>                Set wall width (in picture count)\n"
-             "  -f, --fps <float>           Set fps\n"
-             "\n" );
-}
-
-static void xyuv_count_frames( xyuv_t *xyuv );
-static void xyuv_detect( int *pi_width, int *pi_height );
-static void xyuv_display( xyuv_t *xyuv, int i_frame );
-
-int main( int argc, char **argv )
-{
-    int i;
-
-    /* Parse commande line */
-    for( i = 1; i < argc; i++ ) {
-        if( !strcasecmp( argv[i], "--help" ) ) {
-            help();
-            return 0;
-        }
-        if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) {
-            xyuv.b_diff = 1;
-        } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) {
-            xyuv.b_split = 1;
-        } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_fps = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_height = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) {
-            char *p;
-
-            if( i >= argc -1 ) goto err_missing_arg;
-
-            xyuv.i_width = strtol( argv[++i], &p, 0 );
-            p++;
-            xyuv.i_height = atoi( p );
-        } else if( !strcmp( argv[i], "-W" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_wall_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-y" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_y = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_join = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) {
-            xyuv.b_grid = 1;
-        } else {
-            FILE *f = fopen( argv[i], "rb" );
-            if( !f ) {
-                fprintf( stderr, "cannot open YUV %s\n", argv[i] );
-            } else {
-                xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] );
-                xyuv.yuv[xyuv.i_yuv].f = f;
-                xyuv.yuv[xyuv.i_yuv].i_frames = 0;
-
-                xyuv.i_yuv++;
-            }
-        }
-    }
-
-    if( xyuv.i_yuv == 0 ) {
-        fprintf( stderr, "no file to display\n" );
-        return -1;
-    }
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        char *psz = xyuv.yuv[0].name;
-        char *num;
-        char *x;
-        /* See if we find widthxheight in the file name */
-        for( ;; ) {
-            if( !( x = strchr( psz+1, 'x' ) ) ) {
-                break;
-            }
-            num = x;
-            while( num > psz && num[-1] >= '0' && num[-1] <= '9' )
-                num--;
-
-            if( num != x && x[1] >= '0' && x[1] <= '9' ) {

 
@@ -1,792 +0,0 @@
-/*****************************************************************************
- * xyuv.c: a SDL yuv 420 planer viewer.
- *****************************************************************************
- * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-
-#include <SDL/SDL.h>
-
-#define YUV_MAX 20
-#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps"
-typedef struct
-{
-    /* globals */
-    int     i_width;
-    int     i_height;
-    int     i_frame_size;
-    int     i_frame;
-    int     i_frames;
-    float   f_fps;
-
-    float   f_y;
-
-    int     b_pause;
-    int     b_grid;
-    int     b_split;
-    int     b_diff;
-    int     i_join;
-
-    /* Constructed picture */
-    int     i_wall_width;   /* in picture count */
-
-    /* YUV files */
-    int     i_yuv;
-    struct
-    {
-        char    *name;
-        FILE    *f;         /* handles */
-        int     i_frames;   /* frames count */
-
-        /* Position in the whole picture */
-        int     x, y;
-    } yuv[YUV_MAX];
-
-    /* SDL */
-    int i_sdl_width;
-    int i_sdl_height;
-
-    int i_display_width;
-    int i_display_height;
-    char *title;
-
-    SDL_Surface *screen;
-    SDL_Overlay *overlay;
-
-    /* */
-    uint8_t *pic;
-
-} xyuv_t;
-
-xyuv_t xyuv = {
-    .i_width = 0,
-    .i_height = 0,
-    .i_frame  = 1,
-    .i_frames = 0,
-    .f_fps = 25.0,
-    .f_y = 0.0,
-    .i_wall_width = 0,
-
-    .i_yuv = 0,
-
-    .b_pause = 0,
-    .b_split = 0,
-    .b_diff = 0,
-    .i_join = -1,
-
-    .title = NULL,
-    .pic = NULL,
-};
-
-static void help( void )
-{
-    fprintf( stderr,
-             "Syntax: xyuv [options] file [file2 ...]\n"
-             "\n"
-             "      --help                  Print this help\n"
-             "\n"
-             "  -s, --size <WIDTHxHEIGHT>   Set input size\n"
-             "  -w, --width <integer>       Set width\n"
-             "  -h, --height <integer>      Set height\n"
-             "\n"
-             "  -S, --split                 Show splited Y/U/V planes\n"
-             "  -d, --diff                  Show difference (only 2 files) in split mode\n"
-             "  -j, --joint <integer>\n"
-             "\n"
-             "  -y <float>                  Set Y factor\n"
-             "\n"
-             "  -g, --grid                  Show a grid (macroblock 16x16)\n"
-             "  -W <integer>                Set wall width (in picture count)\n"
-             "  -f, --fps <float>           Set fps\n"
-             "\n" );
-}
-
-static void xyuv_count_frames( xyuv_t *xyuv );
-static void xyuv_detect( int *pi_width, int *pi_height );
-static void xyuv_display( xyuv_t *xyuv, int i_frame );
-
-int main( int argc, char **argv )
-{
-    int i;
-
-    /* Parse commande line */
-    for( i = 1; i < argc; i++ ) {
-        if( !strcasecmp( argv[i], "--help" ) ) {
-            help();
-            return 0;
-        }
-        if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) {
-            xyuv.b_diff = 1;
-        } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) {
-            xyuv.b_split = 1;
-        } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_fps = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_height = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) {
-            char *p;
-
-            if( i >= argc -1 ) goto err_missing_arg;
-
-            xyuv.i_width = strtol( argv[++i], &p, 0 );
-            p++;
-            xyuv.i_height = atoi( p );
-        } else if( !strcmp( argv[i], "-W" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_wall_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-y" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_y = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_join = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) {
-            xyuv.b_grid = 1;
-        } else {
-            FILE *f = fopen( argv[i], "rb" );
-            if( !f ) {
-                fprintf( stderr, "cannot open YUV %s\n", argv[i] );
-            } else {
-                xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] );
-                xyuv.yuv[xyuv.i_yuv].f = f;
-                xyuv.yuv[xyuv.i_yuv].i_frames = 0;
-
-                xyuv.i_yuv++;
-            }
-        }
-    }
-
-    if( xyuv.i_yuv == 0 ) {
-        fprintf( stderr, "no file to display\n" );
-        return -1;
-    }
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        char *psz = xyuv.yuv[0].name;
-        char *num;
-        char *x;
-        /* See if we find widthxheight in the file name */
-        for( ;; ) {
-            if( !( x = strchr( psz+1, 'x' ) ) ) {
-                break;
-            }
-            num = x;
-            while( num > psz && num[-1] >= '0' && num[-1] <= '9' )
-                num--;
-
-            if( num != x && x[1] >= '0' && x[1] <= '9' ) {
​

x264-snapshot-20130723-2245.tar.bz2/AUTHORS -> x264-snapshot-20140321-2245.tar.bz2/AUTHORS Changed

 
@@ -43,8 +43,8 @@
 S: Brittany, France
 
 N: Henrik Gramner
-E: hengar-6 AT student DOT ltu DOT se
-D: 4:2:2 chroma subsampling, x86 asm
+E: henrik AT gramner DOT com
+D: 4:2:2 chroma subsampling, x86 asm, Windows improvements, bugfixes
 S: Sweden
 
 N: Jason Garrett-Glaser
@@ -99,7 +99,3 @@
 E: radoslaw AT syskin DOT cjb DOT net
 D: Cached motion compensation
 
-N: Tuukka Toivonen
-E: tuukkat AT ee DOT oulu DOT fi
-D: Visualization
-
​

x264-snapshot-20130723-2245.tar.bz2/Makefile -> x264-snapshot-20140321-2245.tar.bz2/Makefile Changed

@@ -69,9 +69,8 @@
 SRCCLI += output/mp4.c
 endif
 
-# Visualization sources
-ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
-SRCS   += common/visualize.c common/display-x11.c
+ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),)
+SRCCLI += output/mp4_lsmash.c
 endif
 
 # MMX/SSE optims
@@ -247,29 +246,29 @@
 	rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
 
 install-cli: cli
-	install -d $(DESTDIR)$(bindir)
-	install x264$(EXE) $(DESTDIR)$(bindir)
+	$(INSTALL) -d $(DESTDIR)$(bindir)
+	$(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
 
 install-lib-dev:
-	install -d $(DESTDIR)$(includedir)
-	install -d $(DESTDIR)$(libdir)
-	install -d $(DESTDIR)$(libdir)/pkgconfig
-	install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
-	install -m 644 x264_config.h $(DESTDIR)$(includedir)
-	install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL) -d $(DESTDIR)$(includedir)
+	$(INSTALL) -d $(DESTDIR)$(libdir)
+	$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
+	$(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
+	$(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
 
 install-lib-static: lib-static install-lib-dev
-	install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
+	$(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
 	$(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
 
 install-lib-shared: lib-shared install-lib-dev
 ifneq ($(IMPLIBNAME),)
-	install -d $(DESTDIR)$(bindir)
-	install -m 755 $(SONAME) $(DESTDIR)$(bindir)
-	install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
+	$(INSTALL) -d $(DESTDIR)$(bindir)
+	$(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
+	$(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
 else ifneq ($(SONAME),)
 	ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
-	install -m 755 $(SONAME) $(DESTDIR)$(libdir)
+	$(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir)
 endif
 
 uninstall:

 
@@ -69,9 +69,8 @@
 SRCCLI += output/mp4.c
 endif
 
-# Visualization sources
-ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
-SRCS   += common/visualize.c common/display-x11.c
+ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),)
+SRCCLI += output/mp4_lsmash.c
 endif
 
 # MMX/SSE optims
@@ -247,29 +246,29 @@
    rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
 
 install-cli: cli
-   install -d $(DESTDIR)$(bindir)
-   install x264$(EXE) $(DESTDIR)$(bindir)
+   $(INSTALL) -d $(DESTDIR)$(bindir)
+   $(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
 
 install-lib-dev:
-   install -d $(DESTDIR)$(includedir)
-   install -d $(DESTDIR)$(libdir)
-   install -d $(DESTDIR)$(libdir)/pkgconfig
-   install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
-   install -m 644 x264_config.h $(DESTDIR)$(includedir)
-   install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
+   $(INSTALL) -d $(DESTDIR)$(includedir)
+   $(INSTALL) -d $(DESTDIR)$(libdir)
+   $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
+   $(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
+   $(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
+   $(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
 
 install-lib-static: lib-static install-lib-dev
-   install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
+   $(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
    $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
 
 install-lib-shared: lib-shared install-lib-dev
 ifneq ($(IMPLIBNAME),)
-   install -d $(DESTDIR)$(bindir)
-   install -m 755 $(SONAME) $(DESTDIR)$(bindir)
-   install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
+   $(INSTALL) -d $(DESTDIR)$(bindir)
+   $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
+   $(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
 else ifneq ($(SONAME),)
    ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
-   install -m 755 $(SONAME) $(DESTDIR)$(libdir)
+   $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir)
 endif
 
 uninstall:
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/asm.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/asm.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * asm.S: arm utility macros
  *****************************************************************************
- * Copyright (C) 2008-2013 x264 project
+ * Copyright (C) 2008-2014 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *          David Conrad <lessen42@gmail.com>
@@ -26,6 +26,8 @@
 
 #include "config.h"
 
+.syntax unified
+
 #ifdef PREFIX
 #   define EXTERN_ASM _
 #else
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/cpu-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/cpu-a.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu-a.S: arm cpu detection
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,7 +26,7 @@
 #include "asm.S"
 
 .fpu neon
-.align
+.align 2
 
 // done in gas because .fpu neon overrides the refusal to assemble
 // instructions the selected -march/-mcpu doesn't support
@@ -95,7 +95,7 @@
     sub         r2, r2, r1
     cmpgt       r2, #30 << 3    // assume context switch if it took over 30 cycles
     addle       r3, r3, r2
-    subles      ip, ip, #1
+    subsle      ip, ip, #1
     bgt         average_loop
 
     // disable counters if we enabled them
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/dct-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct-a.S Changed

 
@@ -1,7 +1,7 @@
 /****************************************************************************
  * dct-a.S: arm transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: arm transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/deblock-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/deblock-a.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.S: arm deblocking
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-a.S Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.S: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -167,7 +167,7 @@
     ldr         ip, [sp, #8]
     push        {r4-r6,lr}
     cmp         ip, #32
-    ldrd        r4, [sp, #16]
+    ldrd        r4, r5, [sp, #16]
     mov         lr, #\h
     beq         x264_pixel_avg_w\w\()_neon
     rsbs        r6,  ip,  #64
@@ -447,7 +447,7 @@
 .ifc \type, full
     ldr         lr,  [r4, #32]      // denom
 .endif
-    ldrd        r4,  [r4, #32+4]    // scale, offset
+    ldrd        r4,  r5,  [r4, #32+4]    // scale, offset
     vdup.16     q0,  r4
     vdup.16     q1,  r5
 .ifc \type, full
@@ -818,8 +818,8 @@
 function x264_mc_chroma_neon
     push            {r4-r8, lr}
     vpush           {d8-d11}
-    ldrd            r4, [sp, #56]
-    ldrd            r6, [sp, #64]
+    ldrd            r4, r5, [sp, #56]
+    ldrd            r6, r7, [sp, #64]
 
     asr             lr, r6, #3
     mul             lr, r4, lr
@@ -1380,8 +1380,8 @@
 function x264_frame_init_lowres_core_neon
     push            {r4-r10,lr}
     vpush           {d8-d15}
-    ldrd            r4,  [sp, #96]
-    ldrd            r6,  [sp, #104]
+    ldrd            r4,  r5,  [sp, #96]
+    ldrd            r6,  r7,  [sp, #104]
     ldr             lr,  [sp, #112]
     sub             r10, r6,  r7            // dst_stride - width
     and             r10, r10, #~15

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.S: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -167,7 +167,7 @@
     ldr         ip, [sp, #8]
     push        {r4-r6,lr}
     cmp         ip, #32
-    ldrd        r4, [sp, #16]
+    ldrd        r4, r5, [sp, #16]
     mov         lr, #\h
     beq         x264_pixel_avg_w\w\()_neon
     rsbs        r6,  ip,  #64
@@ -447,7 +447,7 @@
 .ifc \type, full
     ldr         lr,  [r4, #32]      // denom
 .endif
-    ldrd        r4,  [r4, #32+4]    // scale, offset
+    ldrd        r4,  r5,  [r4, #32+4]    // scale, offset
     vdup.16     q0,  r4
     vdup.16     q1,  r5
 .ifc \type, full
@@ -818,8 +818,8 @@
 function x264_mc_chroma_neon
     push            {r4-r8, lr}
     vpush           {d8-d11}
-    ldrd            r4, [sp, #56]
-    ldrd            r6, [sp, #64]
+    ldrd            r4, r5, [sp, #56]
+    ldrd            r6, r7, [sp, #64]
 
     asr             lr, r6, #3
     mul             lr, r4, lr
@@ -1380,8 +1380,8 @@
 function x264_frame_init_lowres_core_neon
     push            {r4-r10,lr}
     vpush           {d8-d15}
-    ldrd            r4,  [sp, #96]
-    ldrd            r6,  [sp, #104]
+    ldrd            r4,  r5,  [sp, #96]
+    ldrd            r6,  r7,  [sp, #104]
     ldr             lr,  [sp, #112]
     sub             r10, r6,  r7            // dst_stride - width
     and             r10, r10, #~15
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-c.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel-a.S Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.S: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -328,9 +328,9 @@
 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
     push        {r6-r7,lr}
 .if \x == 3
-    ldrd        r6,  [sp, #12]
+    ldrd        r6,  r7,  [sp, #12]
 .else
-    ldrd        r6,  [sp, #16]
+    ldrd        r6,  r7,  [sp, #16]
     ldr         r12, [sp, #12]
 .endif
     mov         lr,  #FENC_STRIDE
@@ -519,6 +519,38 @@
     b               x264_var_end
 .endfunc
 
+function x264_pixel_var_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d18}, [r0,:64], r1
+    vmull.u8        q1,  d16, d16
+    vmovl.u8        q0,  d16
+    vld1.64         {d20}, [r0,:64], r1
+    vmull.u8        q2,  d18, d18
+    vaddw.u8        q0,  q0,  d18
+
+    mov             ip,  #12
+
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q1,   q14,  d20, vpaddl.u16
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q2,   q15,  d22, vpaddl.u16
+
+1:  subs            ip,  ip,  #4
+    vld1.64         {d18}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q14,  q12, d16
+    vld1.64         {d20}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q15,  q13, d18
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q12,  q14, d20
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               1b
+2:
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               x264_var_end
+.endfunc
+
 function x264_pixel_var_16x16_neon
     vld1.64         {d16-d17}, [r0,:128], r1
     vmull.u8        q12, d16, d16
@@ -596,13 +628,56 @@
     vadd.s32        d1,  d2,  d3
     vpadd.s32       d0,  d0,  d1
 
-    vmov.32         r0,  r1,  d0
+    vmov            r0,  r1,  d0
     vst1.32         {d0[1]}, [ip,:32]
     mul             r0,  r0,  r0
     sub             r0,  r1,  r0,  lsr #6
     bx              lr
 .endfunc
 
+function x264_pixel_var2_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d17}, [r2,:64], r3
+    vld1.64         {d18}, [r0,:64], r1
+    vld1.64         {d19}, [r2,:64], r3
+    vsubl.u8        q10, d16, d17
+    vsubl.u8        q11, d18, d19
+    SQR_ACC         q1,  d20, d21,  vmull.s16
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q10, q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23,  vmull.s16
+    mov             ip,  #14
+1:  subs            ip,  ip,  #2
+    vld1.64         {d18}, [r0,:64], r1
+    vsubl.u8        q10, d16, d17
+    vld1.64         {d19}, [r2,:64], r3
+    vadd.s16        q0,  q0,  q10
+    SQR_ACC         q1,  d20, d21
+    vsubl.u8        q11, d18, d19
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q0,  q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23
+    b               1b
+2:
+    vadd.s16        q0,  q0,  q11
+    SQR_ACC         q2,  d22, d23
+
+    ldr             ip,  [sp]
+    vadd.s16        d0,  d0,  d1
+    vadd.s32        q1,  q1,  q2
+    vpaddl.s16      d0,  d0
+    vadd.s32        d1,  d2,  d3
+    vpadd.s32       d0,  d0,  d1
+
+    vmov            r0,  r1,  d0
+    vst1.32         {d0[1]}, [ip,:32]
+    mul             r0,  r0,  r0
+    sub             r0,  r1,  r0,  lsr #7
+    bx              lr
+.endfunc
 
 .macro LOAD_DIFF_8x4 q0 q1 q2 q3
     vld1.32     {d1}, [r2], r3

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.S: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -328,9 +328,9 @@
 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
     push        {r6-r7,lr}
 .if \x == 3
-    ldrd        r6,  [sp, #12]
+    ldrd        r6,  r7,  [sp, #12]
 .else
-    ldrd        r6,  [sp, #16]
+    ldrd        r6,  r7,  [sp, #16]
     ldr         r12, [sp, #12]
 .endif
     mov         lr,  #FENC_STRIDE
@@ -519,6 +519,38 @@
     b               x264_var_end
 .endfunc
 
+function x264_pixel_var_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d18}, [r0,:64], r1
+    vmull.u8        q1,  d16, d16
+    vmovl.u8        q0,  d16
+    vld1.64         {d20}, [r0,:64], r1
+    vmull.u8        q2,  d18, d18
+    vaddw.u8        q0,  q0,  d18
+
+    mov             ip,  #12
+
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q1,   q14,  d20, vpaddl.u16
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q2,   q15,  d22, vpaddl.u16
+
+1:  subs            ip,  ip,  #4
+    vld1.64         {d18}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q14,  q12, d16
+    vld1.64         {d20}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q15,  q13, d18
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q12,  q14, d20
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               1b
+2:
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               x264_var_end
+.endfunc
+
 function x264_pixel_var_16x16_neon
     vld1.64         {d16-d17}, [r0,:128], r1
     vmull.u8        q12, d16, d16
@@ -596,13 +628,56 @@
     vadd.s32        d1,  d2,  d3
     vpadd.s32       d0,  d0,  d1
 
-    vmov.32         r0,  r1,  d0
+    vmov            r0,  r1,  d0
     vst1.32         {d0[1]}, [ip,:32]
     mul             r0,  r0,  r0
     sub             r0,  r1,  r0,  lsr #6
     bx              lr
 .endfunc
 
+function x264_pixel_var2_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d17}, [r2,:64], r3
+    vld1.64         {d18}, [r0,:64], r1
+    vld1.64         {d19}, [r2,:64], r3
+    vsubl.u8        q10, d16, d17
+    vsubl.u8        q11, d18, d19
+    SQR_ACC         q1,  d20, d21,  vmull.s16
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q10, q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23,  vmull.s16
+    mov             ip,  #14
+1:  subs            ip,  ip,  #2
+    vld1.64         {d18}, [r0,:64], r1
+    vsubl.u8        q10, d16, d17
+    vld1.64         {d19}, [r2,:64], r3
+    vadd.s16        q0,  q0,  q10
+    SQR_ACC         q1,  d20, d21
+    vsubl.u8        q11, d18, d19
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q0,  q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23
+    b               1b
+2:
+    vadd.s16        q0,  q0,  q11
+    SQR_ACC         q2,  d22, d23
+
+    ldr             ip,  [sp]
+    vadd.s16        d0,  d0,  d1
+    vadd.s32        q1,  q1,  q2
+    vpaddl.s16      d0,  d0
+    vadd.s32        d1,  d2,  d3
+    vpadd.s32       d0,  d0,  d1
+
+    vmov            r0,  r1,  d0
+    vst1.32         {d0[1]}, [ip,:32]
+    mul             r0,  r0,  r0
+    sub             r0,  r1,  r0,  lsr #7
+    bx              lr
+.endfunc
 
 .macro LOAD_DIFF_8x4 q0 q1 q2 q3
     vld1.32     {d1}, [r2], r3
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -56,8 +56,10 @@
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
 
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
-int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
 
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -56,8 +56,10 @@
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
 
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
-int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
 
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-a.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.S: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -181,9 +181,9 @@
 
 function x264_predict_8x8_dc_neon
     mov     ip, #0
-    ldrd    r2, [r1, #8]
+    ldrd    r2, r3, [r1, #8]
     push    {r4-r5,lr}
-    ldrd    r4, [r1, #16]
+    ldrd    r4, r5, [r1, #16]
     lsl     r3, r3, #8
     ldrb    lr, [r1, #7]
     usad8   r2, r2, ip
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-c.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,6 +26,16 @@
 #ifndef X264_ARM_PREDICT_H
 #define X264_ARM_PREDICT_H
 
+void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8c_dc_neon( pixel *src );
+void x264_predict_8x8c_h_neon( pixel *src );
+void x264_predict_8x8c_v_neon( pixel *src );
+void x264_predict_16x16_v_neon( pixel *src );
+void x264_predict_16x16_h_neon( pixel *src );
+void x264_predict_16x16_dc_neon( pixel *src );
+
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,6 +26,16 @@
 #ifndef X264_ARM_PREDICT_H
 #define X264_ARM_PREDICT_H
 
+void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8c_dc_neon( pixel *src );
+void x264_predict_8x8c_h_neon( pixel *src );
+void x264_predict_8x8c_v_neon( pixel *src );
+void x264_predict_16x16_v_neon( pixel *src );
+void x264_predict_16x16_h_neon( pixel *src );
+void x264_predict_16x16_dc_neon( pixel *src );
+
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/quant-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant-a.S Changed

@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -312,7 +312,7 @@
 
 // int coeff_last( int16_t *l )
 function x264_coeff_last4_arm
-    ldrd        r2,  [r0]
+    ldrd        r2,  r3,  [r0]
     subs        r0,  r3,  #0
     movne       r0,  #2
     movne       r2,  r3
@@ -341,7 +341,7 @@
 
     subs        r1,  ip,  r1,  lsr #2
     addge       r0,  r1,  #\size - 8
-    sublts      r0,  r3,  r0,  lsr #2
+    subslt      r0,  r3,  r0,  lsr #2
     movlt       r0,  #0
     bx          lr
 .endfunc
@@ -390,7 +390,7 @@
 
     subs        r1,  ip,  r1
     addge       r0,  r1,  #32
-    sublts      r0,  ip,  r0
+    subslt      r0,  ip,  r0
     movlt       r0,  #0
     bx          lr
 .endfunc

 
@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -312,7 +312,7 @@
 
 // int coeff_last( int16_t *l )
 function x264_coeff_last4_arm
-    ldrd        r2,  [r0]
+    ldrd        r2,  r3,  [r0]
     subs        r0,  r3,  #0
     movne       r0,  #2
     movne       r2,  r3
@@ -341,7 +341,7 @@
 
     subs        r1,  ip,  r1,  lsr #2
     addge       r0,  r1,  #\size - 8
-    sublts      r0,  r3,  r0,  lsr #2
+    subslt      r0,  r3,  r0,  lsr #2
     movlt       r0,  #0
     bx          lr
 .endfunc
@@ -390,7 +390,7 @@
 
     subs        r1,  ip,  r1
     addge       r0,  r1,  #32
-    sublts      r0,  ip,  r0
+    subslt      r0,  ip,  r0
     movlt       r0,  #0
     bx          lr
 .endfunc
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/bitstream.c -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.c: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/bitstream.h -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.h: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.c: arithmetic coder
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/cabac.h -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.h: arithmetic coder
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/common.c -> x264-snapshot-20140321-2245.tar.bz2/common/common.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.c: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -32,6 +32,9 @@
 #if HAVE_MALLOC_H
 #include <malloc.h>
 #endif
+#if HAVE_THP
+#include <sys/mman.h>
+#endif
 
 const int x264_bit_depth = BIT_DEPTH;
 
@@ -342,7 +345,7 @@
             param->analyse.i_luma_deadzone[1] = 6;
             param->rc.f_qcompress = 0.8;
         }
-        else if( !strncasecmp( s, "stillimage", 5 ) )
+        else if( !strncasecmp( s, "stillimage", 10 ) )
         {
             if( psy_tuning_used++ ) goto psy_failure;
             param->i_deblocking_filter_alphac0 = -3;
@@ -668,6 +671,8 @@
     }
     OPT("bluray-compat")
         p->b_bluray_compat = atobool(value);
+    OPT("avcintra-class")
+        p->i_avcintra_class = atoi(value);
     OPT("sar")
     {
         b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
@@ -876,10 +881,6 @@
     }
     OPT("log")
         p->i_log_level = atoi(value);
-#if HAVE_VISUALIZE
-    OPT("visualize")
-        p->b_visualize = atobool(value);
-#endif
     OPT("dump-yuv")
         p->psz_dump_yuv = strdup(value);
     OPT2("analyse", "partitions")
@@ -1031,6 +1032,8 @@
         p->b_vfr_input = !atobool(value);
     OPT("nal-hrd")
         b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
+    OPT("filler")
+        p->rc.b_filler = atobool(value);
     OPT("pic-struct")
         p->b_pic_struct = atobool(value);
     OPT("fake-interlaced")
@@ -1099,7 +1102,7 @@
             break;
     }
     fprintf( stderr, "x264 [%s]: ", psz_prefix );
-    vfprintf( stderr, psz_fmt, arg );
+    x264_vfprintf( stderr, psz_fmt, arg );
 }
 
 /****************************************************************************
@@ -1141,7 +1144,7 @@
     };
 
     int csp = i_csp & X264_CSP_MASK;
-    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX )
+    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
         return -1;
     x264_picture_init( pic );
     pic->img.i_csp = i_csp;
@@ -1183,7 +1186,25 @@
 {
     uint8_t *align_buf = NULL;
 #if HAVE_MALLOC_H
-    align_buf = memalign( NATIVE_ALIGN, i_size );
+#if HAVE_THP
+#define HUGE_PAGE_SIZE 2*1024*1024
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
+    /* Attempt to allocate huge pages to reduce TLB misses. */
+    if( i_size >= HUGE_PAGE_THRESHOLD )
+    {
+        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
+        if( align_buf )
+        {
+            /* Round up to the next huge page boundary if we are close enough. */
+            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
+            madvise( align_buf, madv_size, MADV_HUGEPAGE );
+        }
+    }
+    else
+#undef HUGE_PAGE_SIZE
+#undef HUGE_PAGE_THRESHOLD
+#endif
+        align_buf = memalign( NATIVE_ALIGN, i_size );
 #else
     uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
     if( buf )
@@ -1246,7 +1267,7 @@
     int b_error = 0;
     size_t i_size;
     char *buf;
-    FILE *fh = fopen( filename, "rb" );
+    FILE *fh = x264_fopen( filename, "rb" );
     if( !fh )
         return NULL;
     b_error |= fseek( fh, 0, SEEK_END ) < 0;
@@ -1383,7 +1404,7 @@
         s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
 
     if( p->rc.i_vbv_buffer_size )
-        s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] );
+        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
     if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
         s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
                                                    p->crop_rect.i_right, p->crop_rect.i_bottom );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.c: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -32,6 +32,9 @@
 #if HAVE_MALLOC_H
 #include <malloc.h>
 #endif
+#if HAVE_THP
+#include <sys/mman.h>
+#endif
 
 const int x264_bit_depth = BIT_DEPTH;
 
@@ -342,7 +345,7 @@
             param->analyse.i_luma_deadzone[1] = 6;
             param->rc.f_qcompress = 0.8;
         }
-        else if( !strncasecmp( s, "stillimage", 5 ) )
+        else if( !strncasecmp( s, "stillimage", 10 ) )
         {
             if( psy_tuning_used++ ) goto psy_failure;
             param->i_deblocking_filter_alphac0 = -3;
@@ -668,6 +671,8 @@
     }
     OPT("bluray-compat")
         p->b_bluray_compat = atobool(value);
+    OPT("avcintra-class")
+        p->i_avcintra_class = atoi(value);
     OPT("sar")
     {
         b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
@@ -876,10 +881,6 @@
     }
     OPT("log")
         p->i_log_level = atoi(value);
-#if HAVE_VISUALIZE
-    OPT("visualize")
-        p->b_visualize = atobool(value);
-#endif
     OPT("dump-yuv")
         p->psz_dump_yuv = strdup(value);
     OPT2("analyse", "partitions")
@@ -1031,6 +1032,8 @@
         p->b_vfr_input = !atobool(value);
     OPT("nal-hrd")
         b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
+    OPT("filler")
+        p->rc.b_filler = atobool(value);
     OPT("pic-struct")
         p->b_pic_struct = atobool(value);
     OPT("fake-interlaced")
@@ -1099,7 +1102,7 @@
             break;
     }
     fprintf( stderr, "x264 [%s]: ", psz_prefix );
-    vfprintf( stderr, psz_fmt, arg );
+    x264_vfprintf( stderr, psz_fmt, arg );
 }
 
 /****************************************************************************
@@ -1141,7 +1144,7 @@
     };
 
     int csp = i_csp & X264_CSP_MASK;
-    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX )
+    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
         return -1;
     x264_picture_init( pic );
     pic->img.i_csp = i_csp;
@@ -1183,7 +1186,25 @@
 {
     uint8_t *align_buf = NULL;
 #if HAVE_MALLOC_H
-    align_buf = memalign( NATIVE_ALIGN, i_size );
+#if HAVE_THP
+#define HUGE_PAGE_SIZE 2*1024*1024
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
+    /* Attempt to allocate huge pages to reduce TLB misses. */
+    if( i_size >= HUGE_PAGE_THRESHOLD )
+    {
+        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
+        if( align_buf )
+        {
+            /* Round up to the next huge page boundary if we are close enough. */
+            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
+            madvise( align_buf, madv_size, MADV_HUGEPAGE );
+        }
+    }
+    else
+#undef HUGE_PAGE_SIZE
+#undef HUGE_PAGE_THRESHOLD
+#endif
+        align_buf = memalign( NATIVE_ALIGN, i_size );
 #else
     uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
     if( buf )
@@ -1246,7 +1267,7 @@
     int b_error = 0;
     size_t i_size;
     char *buf;
-    FILE *fh = fopen( filename, "rb" );
+    FILE *fh = x264_fopen( filename, "rb" );
     if( !fh )
         return NULL;
     b_error |= fseek( fh, 0, SEEK_END ) < 0;
@@ -1383,7 +1404,7 @@
         s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
 
     if( p->rc.i_vbv_buffer_size )
-        s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] );
+        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
     if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
         s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
                                                    p->crop_rect.i_right, p->crop_rect.i_bottom );
​

x264-snapshot-20130723-2245.tar.bz2/common/common.h -> x264-snapshot-20140321-2245.tar.bz2/common/common.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.h: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -54,6 +54,31 @@
     memset( var, 0, size );\
 } while( 0 )
 
+/* Macros for merging multiple allocations into a single large malloc, for improved
+ * use with huge pages. */
+
+/* Needs to be enough to contain any set of buffers that use combined allocations */
+#define PREALLOC_BUF_SIZE 1024
+
+#define PREALLOC_INIT\
+    int    prealloc_idx = 0;\
+    size_t prealloc_size = 0;\
+    uint8_t **preallocs[PREALLOC_BUF_SIZE];
+
+#define PREALLOC( var, size )\
+do {\
+    var = (void*)prealloc_size;\
+    preallocs[prealloc_idx++] = (uint8_t**)&var;\
+    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
+} while(0)
+
+#define PREALLOC_END( ptr )\
+do {\
+    CHECKED_MALLOC( ptr, prealloc_size );\
+    while( prealloc_idx-- )\
+        *preallocs[prealloc_idx] += (intptr_t)ptr;\
+} while(0)
+
 #define ARRAY_SIZE(array)  (sizeof(array)/sizeof(array[0]))
 
 #define X264_BFRAME_MAX 16
@@ -84,6 +109,7 @@
 
 #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
 #define FILLER_OVERHEAD (NALU_OVERHEAD+1)
+#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
 
 /****************************************************************************
  * Includes
@@ -491,6 +517,9 @@
     uint8_t *nal_buffer;
     int      nal_buffer_size;
 
+    x264_t          *reconfig_h;
+    int             reconfig;
+
     /**** thread synchronization starts here ****/
 
     /* frame number/poc */
@@ -523,15 +552,15 @@
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[4])[64];   /* [4][6][64] */
     /* quantization matrix for trellis, [cqm][qp][coef] */
-    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
-    int             (*unquant8_mf[4])[64];   /* [4][52][64] */
+    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
     /* quantization matrix for deadzone */
-    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
-    udctcoef        (*quant8_mf[4])[64];     /* [4][52][64] */
-    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
-    udctcoef        (*quant8_bias[4])[64];   /* [4][52][64] */
-    udctcoef        (*quant4_bias0[4])[16];  /* [4][52][16] */
-    udctcoef        (*quant8_bias0[4])[64];  /* [4][52][64] */
+    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
     udctcoef        (*nr_offset_emergency)[4][64];
 
     /* mv/ref cost arrays. */
@@ -699,6 +728,7 @@
          * and won't be copied from one thread to another */
 
         /* mb table */
+        uint8_t *base;                      /* base pointer for all malloced data in this mb */
         int8_t  *type;                      /* mb type */
         uint8_t *partition;                 /* mb partition */
         int8_t  *qp;                        /* mb qp */
@@ -937,9 +967,6 @@
     x264_deblock_function_t loopf;
     x264_bitstream_function_t bsf;
 
-#if HAVE_VISUALIZE
-    struct visualize_t *visualize;
-#endif
     x264_lookahead_t *lookahead;
 
 #if HAVE_OPENCL

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.h: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -54,6 +54,31 @@
     memset( var, 0, size );\
 } while( 0 )
 
+/* Macros for merging multiple allocations into a single large malloc, for improved
+ * use with huge pages. */
+
+/* Needs to be enough to contain any set of buffers that use combined allocations */
+#define PREALLOC_BUF_SIZE 1024
+
+#define PREALLOC_INIT\
+    int    prealloc_idx = 0;\
+    size_t prealloc_size = 0;\
+    uint8_t **preallocs[PREALLOC_BUF_SIZE];
+
+#define PREALLOC( var, size )\
+do {\
+    var = (void*)prealloc_size;\
+    preallocs[prealloc_idx++] = (uint8_t**)&var;\
+    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
+} while(0)
+
+#define PREALLOC_END( ptr )\
+do {\
+    CHECKED_MALLOC( ptr, prealloc_size );\
+    while( prealloc_idx-- )\
+        *preallocs[prealloc_idx] += (intptr_t)ptr;\
+} while(0)
+
 #define ARRAY_SIZE(array)  (sizeof(array)/sizeof(array[0]))
 
 #define X264_BFRAME_MAX 16
@@ -84,6 +109,7 @@
 
 #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
 #define FILLER_OVERHEAD (NALU_OVERHEAD+1)
+#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
 
 /****************************************************************************
  * Includes
@@ -491,6 +517,9 @@
     uint8_t *nal_buffer;
     int      nal_buffer_size;
 
+    x264_t          *reconfig_h;
+    int             reconfig;
+
     /**** thread synchronization starts here ****/
 
     /* frame number/poc */
@@ -523,15 +552,15 @@
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[4])[64];   /* [4][6][64] */
     /* quantization matrix for trellis, [cqm][qp][coef] */
-    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
-    int             (*unquant8_mf[4])[64];   /* [4][52][64] */
+    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
     /* quantization matrix for deadzone */
-    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
-    udctcoef        (*quant8_mf[4])[64];     /* [4][52][64] */
-    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
-    udctcoef        (*quant8_bias[4])[64];   /* [4][52][64] */
-    udctcoef        (*quant4_bias0[4])[16];  /* [4][52][16] */
-    udctcoef        (*quant8_bias0[4])[64];  /* [4][52][64] */
+    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
     udctcoef        (*nr_offset_emergency)[4][64];
 
     /* mv/ref cost arrays. */
@@ -699,6 +728,7 @@
          * and won't be copied from one thread to another */
 
         /* mb table */
+        uint8_t *base;                      /* base pointer for all malloced data in this mb */
         int8_t  *type;                      /* mb type */
         uint8_t *partition;                 /* mb partition */
         int8_t  *qp;                        /* mb qp */
@@ -937,9 +967,6 @@
     x264_deblock_function_t loopf;
     x264_bitstream_function_t bsf;
 
-#if HAVE_VISUALIZE
-    struct visualize_t *visualize;
-#endif
     x264_lookahead_t *lookahead;
 
 #if HAVE_OPENCL
​

x264-snapshot-20130723-2245.tar.bz2/common/cpu.c -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.c: cpu detection
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -74,7 +74,6 @@
 #undef MMX2
     {"Cache32",         X264_CPU_CACHELINE_32},
     {"Cache64",         X264_CPU_CACHELINE_64},
-    {"SSEMisalign",     X264_CPU_SSE_MISALIGN},
     {"LZCNT",           X264_CPU_LZCNT},
     {"BMI1",            X264_CPU_BMI1},
     {"BMI2",            X264_CPU_BMI1|X264_CPU_BMI2},
@@ -123,7 +122,7 @@
     uint32_t cpu = 0;
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = {0};
-    uint32_t max_extended_cap;
+    uint32_t max_extended_cap, max_basic_cap;
     int cache;
 
 #if !ARCH_X86_64
@@ -132,7 +131,8 @@
 #endif
 
     x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
-    if( eax == 0 )
+    max_basic_cap = eax;
+    if( max_basic_cap == 0 )
         return 0;
 
     x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
@@ -169,15 +169,18 @@
         }
     }
 
-    x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
-    /* AVX2 requires OS support, but BMI1/2 don't. */
-    if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
-        cpu |= X264_CPU_AVX2;
-    if( ebx&0x00000008 )
+    if( max_basic_cap >= 7 )
     {
-        cpu |= X264_CPU_BMI1;
-        if( ebx&0x00000100 )
-            cpu |= X264_CPU_BMI2;
+        x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+        /* AVX2 requires OS support, but BMI1/2 don't. */
+        if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+            cpu |= X264_CPU_AVX2;
+        if( ebx&0x00000008 )
+        {
+            cpu |= X264_CPU_BMI1;
+            if( ebx&0x00000100 )
+                cpu |= X264_CPU_BMI2;
+        }
     }
 
     if( cpu & X264_CPU_SSSE3 )
@@ -210,12 +213,6 @@
             }
         }
 
-        if( ecx&0x00000080 ) /* Misalign SSE */
-        {
-            cpu |= X264_CPU_SSE_MISALIGN;
-            x264_cpu_mask_misalign_sse();
-        }
-
         if( cpu & X264_CPU_AVX )
         {
             if( ecx&0x00000800 ) /* XOP */
@@ -274,7 +271,7 @@
             x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
             cache = ecx&0xff; // cacheline size
         }
-        if( !cache )
+        if( !cache && max_basic_cap >= 2 )
         {
             // Cache and TLB Information
             static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
@@ -307,7 +304,7 @@
             x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
     }
 
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
     cpu |= X264_CPU_STACK_MOD4;
 #endif
 
@@ -429,6 +426,10 @@
     return sysconf( _SC_NPROCESSORS_ONLN );
 
 #elif SYS_LINUX
+#ifdef __ANDROID__
+    // Android NDK does not expose sched_getaffinity
+    return sysconf( _SC_NPROCESSORS_CONF );
+#else
     cpu_set_t p_aff;
     memset( &p_aff, 0, sizeof(p_aff) );
     if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
@@ -441,6 +442,7 @@
         np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
     return np;
 #endif
+#endif
 
 #elif SYS_BEOS
     system_info info;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.c: cpu detection
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -74,7 +74,6 @@
 #undef MMX2
     {"Cache32",         X264_CPU_CACHELINE_32},
     {"Cache64",         X264_CPU_CACHELINE_64},
-    {"SSEMisalign",     X264_CPU_SSE_MISALIGN},
     {"LZCNT",           X264_CPU_LZCNT},
     {"BMI1",            X264_CPU_BMI1},
     {"BMI2",            X264_CPU_BMI1|X264_CPU_BMI2},
@@ -123,7 +122,7 @@
     uint32_t cpu = 0;
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = {0};
-    uint32_t max_extended_cap;
+    uint32_t max_extended_cap, max_basic_cap;
     int cache;
 
 #if !ARCH_X86_64
@@ -132,7 +131,8 @@
 #endif
 
     x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
-    if( eax == 0 )
+    max_basic_cap = eax;
+    if( max_basic_cap == 0 )
         return 0;
 
     x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
@@ -169,15 +169,18 @@
         }
     }
 
-    x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
-    /* AVX2 requires OS support, but BMI1/2 don't. */
-    if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
-        cpu |= X264_CPU_AVX2;
-    if( ebx&0x00000008 )
+    if( max_basic_cap >= 7 )
     {
-        cpu |= X264_CPU_BMI1;
-        if( ebx&0x00000100 )
-            cpu |= X264_CPU_BMI2;
+        x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+        /* AVX2 requires OS support, but BMI1/2 don't. */
+        if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+            cpu |= X264_CPU_AVX2;
+        if( ebx&0x00000008 )
+        {
+            cpu |= X264_CPU_BMI1;
+            if( ebx&0x00000100 )
+                cpu |= X264_CPU_BMI2;
+        }
     }
 
     if( cpu & X264_CPU_SSSE3 )
@@ -210,12 +213,6 @@
             }
         }
 
-        if( ecx&0x00000080 ) /* Misalign SSE */
-        {
-            cpu |= X264_CPU_SSE_MISALIGN;
-            x264_cpu_mask_misalign_sse();
-        }
-
         if( cpu & X264_CPU_AVX )
         {
             if( ecx&0x00000800 ) /* XOP */
@@ -274,7 +271,7 @@
             x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
             cache = ecx&0xff; // cacheline size
         }
-        if( !cache )
+        if( !cache && max_basic_cap >= 2 )
         {
             // Cache and TLB Information
             static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
@@ -307,7 +304,7 @@
             x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
     }
 
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
     cpu |= X264_CPU_STACK_MOD4;
 #endif
 
@@ -429,6 +426,10 @@
     return sysconf( _SC_NPROCESSORS_ONLN );
 
 #elif SYS_LINUX
+#ifdef __ANDROID__
+    // Android NDK does not expose sched_getaffinity
+    return sysconf( _SC_NPROCESSORS_CONF );
+#else
     cpu_set_t p_aff;
     memset( &p_aff, 0, sizeof(p_aff) );
     if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
@@ -441,6 +442,7 @@
         np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
     return np;
 #endif
+#endif
 
 #elif SYS_BEOS
     system_info info;
​

x264-snapshot-20130723-2245.tar.bz2/common/cpu.h -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.h: cpu detection
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -45,7 +45,6 @@
 #define x264_emms()
 #endif
 #define x264_sfence x264_cpu_sfence
-void     x264_cpu_mask_misalign_sse( void );
 void     x264_safe_intel_cpu_indicator_init( void );
 
 /* kludge:
@@ -58,8 +57,8 @@
  * alignment between functions (osdep.h handles manual alignment of arrays
  * if it doesn't).
  */
-#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
-int x264_stack_align( void (*func)(), ... );
+#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX
+intptr_t x264_stack_align( void (*func)(), ... );
 #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
 #else
 #define x264_stack_align(func,...) func(__VA_ARGS__)

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.h: cpu detection
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -45,7 +45,6 @@
 #define x264_emms()
 #endif
 #define x264_sfence x264_cpu_sfence
-void     x264_cpu_mask_misalign_sse( void );
 void     x264_safe_intel_cpu_indicator_init( void );
 
 /* kludge:
@@ -58,8 +57,8 @@
  * alignment between functions (osdep.h handles manual alignment of arrays
  * if it doesn't).
  */
-#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
-int x264_stack_align( void (*func)(), ... );
+#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX
+intptr_t x264_stack_align( void (*func)(), ... );
 #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
 #else
 #define x264_stack_align(func,...) func(__VA_ARGS__)
​

x264-snapshot-20130723-2245.tar.bz2/common/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/dct.c Changed

 
@@ -1,11 +1,11 @@
 /*****************************************************************************
  * dct.c: transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: transform and zigzag
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/deblock.c Changed

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * deblock.c: deblocking
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/frame.c -> x264-snapshot-20140321-2245.tar.bz2/common/frame.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.c: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -53,6 +53,7 @@
         case X264_CSP_NV16:
         case X264_CSP_I422:
         case X264_CSP_YV16:
+        case X264_CSP_V210:
             return X264_CSP_NV16;
         case X264_CSP_I444:
         case X264_CSP_YV24:
@@ -86,6 +87,7 @@
 #endif
 
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
+    PREALLOC_INIT
 
     /* allocate frame data (+64 for extra data for me) */
     i_width  = h->mb.i_mb_width*16;
@@ -124,7 +126,7 @@
 
     for( int i = 0; i < h->param.i_bframe + 2; i++ )
         for( int j = 0; j < h->param.i_bframe + 2; j++ )
-            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+            PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 
     frame->i_poc = -1;
     frame->i_type = X264_TYPE_AUTO;
@@ -149,13 +151,9 @@
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
         if( PARAM_INTERLACED )
-        {
-            CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
-        }
+            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
     }
 
     /* all 4 luma planes allocated together, since the cacheline split code
@@ -167,24 +165,15 @@
         if( h->param.analyse.i_subpel_refine && b_fdec )
         {
             /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-            CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-            {
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-            }
-            frame->plane[p] = frame->filtered[p][0];
-            frame->plane_fld[p] = frame->filtered_fld[p][0];
+                PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
         }
         else
         {
-            CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
-            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+                PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
         }
     }
 
@@ -192,36 +181,30 @@
 
     if( b_fdec ) /* fdec frame */
     {
-        CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
-        CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
-        CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
-        CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
-        M32( frame->mv16x16[0] ) = 0;
-        frame->mv16x16++;
-        CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
+        PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+        PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
+        PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
         if( h->param.i_bframe )
         {
-            CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
-            CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
+            PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
+            PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
         }
         else
         {
             frame->mv[1]  = NULL;
             frame->ref[1] = NULL;
         }
-        CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
-        CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
-        CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
+        PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
-        {
-            CHECKED_MALLOC( frame->buffer[3],
-                            frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
-            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
-        }
+            PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
         if( PARAM_INTERLACED )
-            CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
         if( h->param.analyse.b_mb_info )
-            CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
     }
     else /* fenc frame */
     {
@@ -229,30 +212,85 @@
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
 
-            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+            PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
                 {
-                    CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
-                    CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+                    PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+                    PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
                 }
-            CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
+            PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
             for( int j = 0; j <= h->param.i_bframe+1; j++ )
                 for( int i = 0; i <= h->param.i_bframe+1; i++ )
-                    CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
-            frame->i_intra_cost = frame->lowres_costs[0][0];
-            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+                    PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
+
         }
         if( h->param.rc.i_aq_mode )
         {
-            CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
-            CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
             if( h->frames.b_have_lowres )
+                PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+        }
+    }
+
+    PREALLOC_END( frame->base );
+
+    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
+    {
+        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        if( PARAM_INTERLACED )
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
+    }
+
+    for( int p = 0; p < luma_plane_count; p++ )
+    {
+        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+        if( h->param.analyse.i_subpel_refine && b_fdec )
+        {
+            for( int i = 0; i < 4; i++ )
+            {
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+            }
+            frame->plane[p] = frame->filtered[p][0];
+            frame->plane_fld[p] = frame->filtered_fld[p][0];
+        }
+        else
+        {
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
+            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+        }
+    }
+
+    if( b_fdec )
+    {
+        M32( frame->mv16x16[0] ) = 0;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.c: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -53,6 +53,7 @@
         case X264_CSP_NV16:
         case X264_CSP_I422:
         case X264_CSP_YV16:
+        case X264_CSP_V210:
             return X264_CSP_NV16;
         case X264_CSP_I444:
         case X264_CSP_YV24:
@@ -86,6 +87,7 @@
 #endif
 
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
+    PREALLOC_INIT
 
     /* allocate frame data (+64 for extra data for me) */
     i_width  = h->mb.i_mb_width*16;
@@ -124,7 +126,7 @@
 
     for( int i = 0; i < h->param.i_bframe + 2; i++ )
         for( int j = 0; j < h->param.i_bframe + 2; j++ )
-            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+            PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 
     frame->i_poc = -1;
     frame->i_type = X264_TYPE_AUTO;
@@ -149,13 +151,9 @@
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
         if( PARAM_INTERLACED )
-        {
-            CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
-        }
+            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
     }
 
     /* all 4 luma planes allocated together, since the cacheline split code
@@ -167,24 +165,15 @@
         if( h->param.analyse.i_subpel_refine && b_fdec )
         {
             /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-            CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-            {
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-            }
-            frame->plane[p] = frame->filtered[p][0];
-            frame->plane_fld[p] = frame->filtered_fld[p][0];
+                PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
         }
         else
         {
-            CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
-            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+                PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
         }
     }
 
@@ -192,36 +181,30 @@
 
     if( b_fdec ) /* fdec frame */
     {
-        CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
-        CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
-        CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
-        CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
-        M32( frame->mv16x16[0] ) = 0;
-        frame->mv16x16++;
-        CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
+        PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+        PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
+        PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
         if( h->param.i_bframe )
         {
-            CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
-            CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
+            PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
+            PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
         }
         else
         {
             frame->mv[1]  = NULL;
             frame->ref[1] = NULL;
         }
-        CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
-        CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
-        CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
+        PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
-        {
-            CHECKED_MALLOC( frame->buffer[3],
-                            frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
-            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
-        }
+            PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
         if( PARAM_INTERLACED )
-            CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
         if( h->param.analyse.b_mb_info )
-            CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
     }
     else /* fenc frame */
     {
@@ -229,30 +212,85 @@
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
 
-            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+            PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
                 {
-                    CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
-                    CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+                    PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+                    PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
                 }
-            CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
+            PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
             for( int j = 0; j <= h->param.i_bframe+1; j++ )
                 for( int i = 0; i <= h->param.i_bframe+1; i++ )
-                    CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
-            frame->i_intra_cost = frame->lowres_costs[0][0];
-            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+                    PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
+
         }
         if( h->param.rc.i_aq_mode )
         {
-            CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
-            CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
             if( h->frames.b_have_lowres )
+                PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+        }
+    }
+
+    PREALLOC_END( frame->base );
+
+    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
+    {
+        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        if( PARAM_INTERLACED )
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
+    }
+
+    for( int p = 0; p < luma_plane_count; p++ )
+    {
+        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+        if( h->param.analyse.i_subpel_refine && b_fdec )
+        {
+            for( int i = 0; i < 4; i++ )
+            {
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+            }
+            frame->plane[p] = frame->filtered[p][0];
+            frame->plane_fld[p] = frame->filtered_fld[p][0];
+        }
+        else
+        {
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
+            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+        }
+    }
+
+    if( b_fdec )
+    {
+        M32( frame->mv16x16[0] ) = 0;
​

x264-snapshot-20130723-2245.tar.bz2/common/frame.h -> x264-snapshot-20140321-2245.tar.bz2/common/frame.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.h: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -35,6 +35,7 @@
 typedef struct x264_frame
 {
     /* */
+    uint8_t *base;       /* Base pointer for all malloced data in this frame. */
     int     i_poc;
     int     i_delta_poc[2];
     int     i_type;
​

x264-snapshot-20130723-2245.tar.bz2/common/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.c Changed

@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -256,25 +256,26 @@
 
     h->mb.b_interlaced = PARAM_INTERLACED;
 
-    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
-    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
-    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+    PREALLOC_INIT
+
+    PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+    PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
 
     /* 0 -> 3 top(4), 4 -> 6 : left(3) */
-    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+    PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
 
     /* all coeffs */
-    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+    PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
 
     if( h->param.b_cabac )
     {
-        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+        PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
         if( h->param.i_bframe )
-            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+            PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
     }
 
     for( int i = 0; i < 2; i++ )
@@ -284,11 +285,7 @@
             i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
 
         for( int j = !i; j < i_refs; j++ )
-        {
-            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
-            M32( h->mb.mvr[i][j][0] ) = 0;
-            h->mb.mvr[i][j]++;
-        }
+            PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
     }
 
     if( h->param.analyse.i_weighted_pred )
@@ -325,7 +322,24 @@
         }
 
         for( int i = 0; i < numweightbuf; i++ )
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+            PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+    }
+
+    PREALLOC_END( h->mb.base );
+
+    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+    for( int i = 0; i < 2; i++ )
+    {
+        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+        for( int j = !i; j < i_refs; j++ )
+        {
+            M32( h->mb.mvr[i][j][0] ) = 0;
+            h->mb.mvr[i][j]++;
+        }
     }
 
     return 0;
@@ -334,26 +348,7 @@
 }
 void x264_macroblock_cache_free( x264_t *h )
 {
-    for( int i = 0; i < 2; i++ )
-        for( int j = !i; j < X264_REF_MAX*2; j++ )
-            if( h->mb.mvr[i][j] )
-                x264_free( h->mb.mvr[i][j]-1 );
-    for( int i = 0; i < X264_REF_MAX; i++ )
-        x264_free( h->mb.p_weight_buf[i] );
-
-    if( h->param.b_cabac )
-    {
-        x264_free( h->mb.skipbp );
-        x264_free( h->mb.chroma_pred_mode );
-        x264_free( h->mb.mvd[0] );
-        x264_free( h->mb.mvd[1] );
-    }
-    x264_free( h->mb.slice_table );
-    x264_free( h->mb.intra4x4_pred_mode );
-    x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.mb_transform_size );
-    x264_free( h->mb.cbp );
-    x264_free( h->mb.qp );
+    x264_free( h->mb.base );
 }
 
 int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
@@ -394,7 +389,7 @@
             ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
         scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
     }
-    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
     scratch_size = X264_MAX( scratch_size, buf_mbtree );
     if( scratch_size )
         CHECKED_MALLOC( h->scratch_buffer, scratch_size );
@@ -402,7 +397,9 @@
         h->scratch_buffer = NULL;
 
     int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
-    CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
+    int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+    scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+    CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
 
     return 0;
 fail:
@@ -1258,8 +1255,13 @@
         }
     }
 
-    if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
-        h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+    if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+    {
+        if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+            h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+        else
+            h->mb.field_decoding_flag = 0;
+    }
 
     /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
      * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
@@ -1267,26 +1269,8 @@
     if( b_mbaff )
     {
         if( MB_INTERLACED != h->mb.field_decoding_flag &&
-            h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+            (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
             h->mb.b_allow_skip = 0;
-        if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
-        {
-            if( h->mb.i_neighbour & MB_LEFT )
-            {
-                if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else if( h->mb.i_neighbour & MB_TOP )
-            {
-                if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else // Frame mb pair is predicted
-            {
-                if( MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-        }
     }
 
     if( h->param.b_cabac )

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -256,25 +256,26 @@
 
     h->mb.b_interlaced = PARAM_INTERLACED;
 
-    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
-    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
-    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+    PREALLOC_INIT
+
+    PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+    PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
 
     /* 0 -> 3 top(4), 4 -> 6 : left(3) */
-    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+    PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
 
     /* all coeffs */
-    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+    PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
 
     if( h->param.b_cabac )
     {
-        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+        PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
         if( h->param.i_bframe )
-            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+            PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
     }
 
     for( int i = 0; i < 2; i++ )
@@ -284,11 +285,7 @@
             i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
 
         for( int j = !i; j < i_refs; j++ )
-        {
-            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
-            M32( h->mb.mvr[i][j][0] ) = 0;
-            h->mb.mvr[i][j]++;
-        }
+            PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
     }
 
     if( h->param.analyse.i_weighted_pred )
@@ -325,7 +322,24 @@
         }
 
         for( int i = 0; i < numweightbuf; i++ )
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+            PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+    }
+
+    PREALLOC_END( h->mb.base );
+
+    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+    for( int i = 0; i < 2; i++ )
+    {
+        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+        for( int j = !i; j < i_refs; j++ )
+        {
+            M32( h->mb.mvr[i][j][0] ) = 0;
+            h->mb.mvr[i][j]++;
+        }
     }
 
     return 0;
@@ -334,26 +348,7 @@
 }
 void x264_macroblock_cache_free( x264_t *h )
 {
-    for( int i = 0; i < 2; i++ )
-        for( int j = !i; j < X264_REF_MAX*2; j++ )
-            if( h->mb.mvr[i][j] )
-                x264_free( h->mb.mvr[i][j]-1 );
-    for( int i = 0; i < X264_REF_MAX; i++ )
-        x264_free( h->mb.p_weight_buf[i] );
-
-    if( h->param.b_cabac )
-    {
-        x264_free( h->mb.skipbp );
-        x264_free( h->mb.chroma_pred_mode );
-        x264_free( h->mb.mvd[0] );
-        x264_free( h->mb.mvd[1] );
-    }
-    x264_free( h->mb.slice_table );
-    x264_free( h->mb.intra4x4_pred_mode );
-    x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.mb_transform_size );
-    x264_free( h->mb.cbp );
-    x264_free( h->mb.qp );
+    x264_free( h->mb.base );
 }
 
 int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
@@ -394,7 +389,7 @@
             ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
         scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
     }
-    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
     scratch_size = X264_MAX( scratch_size, buf_mbtree );
     if( scratch_size )
         CHECKED_MALLOC( h->scratch_buffer, scratch_size );
@@ -402,7 +397,9 @@
         h->scratch_buffer = NULL;
 
     int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
-    CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
+    int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+    scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+    CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
 
     return 0;
 fail:
@@ -1258,8 +1255,13 @@
         }
     }
 
-    if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
-        h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+    if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+    {
+        if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+            h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+        else
+            h->mb.field_decoding_flag = 0;
+    }
 
     /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
      * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
@@ -1267,26 +1269,8 @@
     if( b_mbaff )
     {
         if( MB_INTERLACED != h->mb.field_decoding_flag &&
-            h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+            (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
             h->mb.b_allow_skip = 0;
-        if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
-        {
-            if( h->mb.i_neighbour & MB_LEFT )
-            {
-                if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else if( h->mb.i_neighbour & MB_TOP )
-            {
-                if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else // Frame mb pair is predicted
-            {
-                if( MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-        }
     }
 
     if( h->param.b_cabac )
​

x264-snapshot-20130723-2245.tar.bz2/common/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/mc.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -336,6 +336,34 @@
     }
 }
 
+void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h )
+{
+    for( int l = 0; l < h; l++ )
+    {
+        pixel *dsty0 = dsty;
+        pixel *dstc0 = dstc;
+        uint32_t *src0 = src;
+
+        for( int n = 0; n < w; n += 3 )
+        {
+            *(dstc0++) = *src0 & 0x03FF;
+            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+            *(dsty0++) = *src0 & 0x03FF;
+            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+        }
+
+        dsty += i_dsty;
+        dstc += i_dstc;
+        src  += i_src;
+    }
+}
+
 static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
 {
     for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
@@ -455,20 +483,97 @@
 
 /* Estimate the total amount of influence on future quality that could be had if we
  * were to improve the reference samples used to inter predict any given macroblock. */
-static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
 {
-    float fps = *fps_factor / 256.f;
+    float fps = *fps_factor;
     for( int i = 0; i < len; i++ )
     {
-        float intra_cost       = intra_costs[i] * inv_qscales[i];
-        float propagate_amount = propagate_in[i] + intra_cost*fps;
-        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
-        float propagate_denom  = intra_costs[i];
-        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
+        int intra_cost = intra_costs[i];
+        int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
+        float propagate_intra  = intra_cost * inv_qscales[i];
+        float propagate_amount = propagate_in[i] + propagate_intra*fps;
+        float propagate_num    = intra_cost - inter_cost;
+        float propagate_denom  = intra_cost;
+        dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
     }
 }
 
+static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list )
+{
+    unsigned stride = h->mb.i_mb_stride;
+    unsigned width = h->mb.i_mb_width;
+    unsigned height = h->mb.i_mb_height;
+
+    for( unsigned i = 0; i < len; i++ )
+    {
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+        int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
+
+        if( !(lists_used & (1 << list)) )
+            continue;
+
+        int listamount = propagate_amount[i];
+        /* Apply bipred weighting. */
+        if( lists_used == 3 )
+            listamount = (listamount * bipred_weight + 32) >> 6;
+
+        /* Early termination for simple case of mv0. */
+        if( !M32( mvs[i] ) )
+        {
+            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
+            continue;
+        }
+
+        int x = mvs[i][0];
+        int y = mvs[i][1];
+        unsigned mbx = (x>>5)+i;
+        unsigned mby = (y>>5)+mb_y;
+        unsigned idx0 = mbx + mby * stride;
+        unsigned idx2 = idx0 + stride;
+        x &= 31;
+        y &= 31;
+        int idx0weight = (32-y)*(32-x);
+        int idx1weight = (32-y)*x;
+        int idx2weight = y*(32-x);
+        int idx3weight = y*x;
+        idx0weight = (idx0weight * listamount + 512) >> 10;
+        idx1weight = (idx1weight * listamount + 512) >> 10;
+        idx2weight = (idx2weight * listamount + 512) >> 10;
+        idx3weight = (idx3weight * listamount + 512) >> 10;
+
+        if( mbx < width-1 && mby < height-1 )
+        {
+            CLIP_ADD( ref_costs[idx0+0], idx0weight );
+            CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            CLIP_ADD( ref_costs[idx2+0], idx2weight );
+            CLIP_ADD( ref_costs[idx2+1], idx3weight );
+        }
+        else
+        {
+            /* Note: this takes advantage of unsigned representation to
+             * catch negative mbx/mby. */
+            if( mby < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            }
+            if( mby+1 < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
+            }
+        }
+    }
+#undef CLIP_ADD
+}
+
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
 {
     pf->mc_luma   = mc_luma;
@@ -507,6 +612,7 @@
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
 
     pf->hpel_filter = hpel_filter;
 
@@ -523,6 +629,7 @@
     pf->integral_init8v = integral_init8v;
 
     pf->mbtree_propagate_cost = mbtree_propagate_cost;
+    pf->mbtree_propagate_list = mbtree_propagate_list;
 
 #if HAVE_MMX
     x264_mc_init_mmx( cpu, pf );
@@ -536,7 +643,10 @@
 #endif
 
     if( cpu_independent )
+    {
         pf->mbtree_propagate_cost = mbtree_propagate_cost;
+        pf->mbtree_propagate_list = mbtree_propagate_list;
+    }
 }
 
 void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -336,6 +336,34 @@
     }
 }
 
+void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h )
+{
+    for( int l = 0; l < h; l++ )
+    {
+        pixel *dsty0 = dsty;
+        pixel *dstc0 = dstc;
+        uint32_t *src0 = src;
+
+        for( int n = 0; n < w; n += 3 )
+        {
+            *(dstc0++) = *src0 & 0x03FF;
+            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+            *(dsty0++) = *src0 & 0x03FF;
+            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+        }
+
+        dsty += i_dsty;
+        dstc += i_dstc;
+        src  += i_src;
+    }
+}
+
 static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
 {
     for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
@@ -455,20 +483,97 @@
 
 /* Estimate the total amount of influence on future quality that could be had if we
  * were to improve the reference samples used to inter predict any given macroblock. */
-static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
 {
-    float fps = *fps_factor / 256.f;
+    float fps = *fps_factor;
     for( int i = 0; i < len; i++ )
     {
-        float intra_cost       = intra_costs[i] * inv_qscales[i];
-        float propagate_amount = propagate_in[i] + intra_cost*fps;
-        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
-        float propagate_denom  = intra_costs[i];
-        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
+        int intra_cost = intra_costs[i];
+        int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
+        float propagate_intra  = intra_cost * inv_qscales[i];
+        float propagate_amount = propagate_in[i] + propagate_intra*fps;
+        float propagate_num    = intra_cost - inter_cost;
+        float propagate_denom  = intra_cost;
+        dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
     }
 }
 
+static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list )
+{
+    unsigned stride = h->mb.i_mb_stride;
+    unsigned width = h->mb.i_mb_width;
+    unsigned height = h->mb.i_mb_height;
+
+    for( unsigned i = 0; i < len; i++ )
+    {
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+        int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
+
+        if( !(lists_used & (1 << list)) )
+            continue;
+
+        int listamount = propagate_amount[i];
+        /* Apply bipred weighting. */
+        if( lists_used == 3 )
+            listamount = (listamount * bipred_weight + 32) >> 6;
+
+        /* Early termination for simple case of mv0. */
+        if( !M32( mvs[i] ) )
+        {
+            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
+            continue;
+        }
+
+        int x = mvs[i][0];
+        int y = mvs[i][1];
+        unsigned mbx = (x>>5)+i;
+        unsigned mby = (y>>5)+mb_y;
+        unsigned idx0 = mbx + mby * stride;
+        unsigned idx2 = idx0 + stride;
+        x &= 31;
+        y &= 31;
+        int idx0weight = (32-y)*(32-x);
+        int idx1weight = (32-y)*x;
+        int idx2weight = y*(32-x);
+        int idx3weight = y*x;
+        idx0weight = (idx0weight * listamount + 512) >> 10;
+        idx1weight = (idx1weight * listamount + 512) >> 10;
+        idx2weight = (idx2weight * listamount + 512) >> 10;
+        idx3weight = (idx3weight * listamount + 512) >> 10;
+
+        if( mbx < width-1 && mby < height-1 )
+        {
+            CLIP_ADD( ref_costs[idx0+0], idx0weight );
+            CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            CLIP_ADD( ref_costs[idx2+0], idx2weight );
+            CLIP_ADD( ref_costs[idx2+1], idx3weight );
+        }
+        else
+        {
+            /* Note: this takes advantage of unsigned representation to
+             * catch negative mbx/mby. */
+            if( mby < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            }
+            if( mby+1 < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
+            }
+        }
+    }
+#undef CLIP_ADD
+}
+
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
 {
     pf->mc_luma   = mc_luma;
@@ -507,6 +612,7 @@
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
 
     pf->hpel_filter = hpel_filter;
 
@@ -523,6 +629,7 @@
     pf->integral_init8v = integral_init8v;
 
     pf->mbtree_propagate_cost = mbtree_propagate_cost;
+    pf->mbtree_propagate_list = mbtree_propagate_list;
 
 #if HAVE_MMX
     x264_mc_init_mmx( cpu, pf );
@@ -536,7 +643,10 @@
 #endif
 
     if( cpu_independent )
+    {
         pf->mbtree_propagate_cost = mbtree_propagate_cost;
+        pf->mbtree_propagate_list = mbtree_propagate_list;
+    }
 }
 
 void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
​

x264-snapshot-20130723-2245.tar.bz2/common/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/mc.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: motion compensation
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -93,6 +93,9 @@
                                      pixel *src,  intptr_t i_src, int w, int h );
     void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
                                          pixel *dstc, intptr_t i_dstc, pixel *src,  intptr_t i_src, int pw, int w, int h );
+    void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h );
     void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                          intptr_t i_stride, int i_width, int i_height, int16_t *buf );
 
@@ -119,8 +122,12 @@
     weight_fn_t *offsetsub;
     void (*weight_cache)( x264_t *, x264_weight_t * );
 
-    void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+    void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+
+    void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list );
 } x264_mc_functions_t;
 
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: motion compensation
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -93,6 +93,9 @@
                                      pixel *src,  intptr_t i_src, int w, int h );
     void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
                                          pixel *dstc, intptr_t i_dstc, pixel *src,  intptr_t i_src, int pw, int w, int h );
+    void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h );
     void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                          intptr_t i_stride, int i_width, int i_height, int16_t *buf );
 
@@ -119,8 +122,12 @@
     weight_fn_t *offsetsub;
     void (*weight_cache)( x264_t *, x264_weight_t * );
 
-    void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+    void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+
+    void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list );
 } x264_mc_functions_t;
 
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );
​

x264-snapshot-20130723-2245.tar.bz2/common/mvpred.c -> x264-snapshot-20140321-2245.tar.bz2/common/mvpred.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mvpred.c: motion vector prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/opencl.c -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.c: OpenCL initialization and kernel compilation
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -28,7 +28,7 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#define ocl_open LoadLibrary( "OpenCL" )
+#define ocl_open LoadLibraryW( L"OpenCL" )
 #define ocl_close FreeLibrary
 #define ocl_address GetProcAddress
 #else
@@ -119,10 +119,10 @@
 
 /* Try to load the cached compiled program binary, verify the device context is
  * still valid before reuse */
-static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version )
+static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
     /* try to load cached program binary */
-    FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
     if( !fp )
         return NULL;
 
@@ -167,9 +167,9 @@
 
 /* Save the compiled program binary to a file for later reuse.  Device context
  * is also saved in the cache file so we do not reuse stale binaries */
-static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version )
+static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
-    FILE *fp = fopen( h->param.psz_clbin_file, "wb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
     if( !fp )
     {
         x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
@@ -304,7 +304,7 @@
         goto fail;
     }
 
-    FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" );
+    FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
     if( !log_file )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
@@ -672,9 +672,9 @@
     int ret = 0;
 
 #ifdef _WIN32
-    hDLL = LoadLibrary( "atiadlxx.dll" );
+    hDLL = LoadLibraryW( L"atiadlxx.dll" );
     if( !hDLL )
-        hDLL = LoadLibrary( "atiadlxy.dll" );
+        hDLL = LoadLibraryW( L"atiadlxy.dll" );
 #else
     hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
 #endif
@@ -685,7 +685,7 @@
     ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
     ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
     ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
-    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
+    if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
         !ADL_PowerXpress_Scheme_Get )
         goto fail1;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.c: OpenCL initialization and kernel compilation
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -28,7 +28,7 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#define ocl_open LoadLibrary( "OpenCL" )
+#define ocl_open LoadLibraryW( L"OpenCL" )
 #define ocl_close FreeLibrary
 #define ocl_address GetProcAddress
 #else
@@ -119,10 +119,10 @@
 
 /* Try to load the cached compiled program binary, verify the device context is
  * still valid before reuse */
-static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version )
+static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
     /* try to load cached program binary */
-    FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
     if( !fp )
         return NULL;
 
@@ -167,9 +167,9 @@
 
 /* Save the compiled program binary to a file for later reuse.  Device context
  * is also saved in the cache file so we do not reuse stale binaries */
-static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version )
+static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
-    FILE *fp = fopen( h->param.psz_clbin_file, "wb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
     if( !fp )
     {
         x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
@@ -304,7 +304,7 @@
         goto fail;
     }
 
-    FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" );
+    FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
     if( !log_file )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
@@ -672,9 +672,9 @@
     int ret = 0;
 
 #ifdef _WIN32
-    hDLL = LoadLibrary( "atiadlxx.dll" );
+    hDLL = LoadLibraryW( L"atiadlxx.dll" );
     if( !hDLL )
-        hDLL = LoadLibrary( "atiadlxy.dll" );
+        hDLL = LoadLibraryW( L"atiadlxy.dll" );
 #else
     hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
 #endif
@@ -685,7 +685,7 @@
     ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
     ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
     ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
-    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
+    if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
         !ADL_PowerXpress_Scheme_Get )
         goto fail1;
 
​

x264-snapshot-20130723-2245.tar.bz2/common/opencl.h -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.h: OpenCL structures and defines
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
​

x264-snapshot-20130723-2245.tar.bz2/common/osdep.c -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.c Changed

@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.c: platform-specific code
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,6 +27,11 @@
 
 #include "common.h"
 
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+
 #if SYS_WINDOWS
 #include <sys/types.h>
 #include <sys/timeb.h>
@@ -35,8 +41,6 @@
 #include <time.h>
 
 #if PTW32_STATIC_LIB
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
 /* this is a global in pthread-win32 to indicate if it has been initialized or not */
 extern int ptw32_processInitialized;
 #endif
@@ -134,3 +138,73 @@
 {}
 #endif
 #endif
+
+#ifdef _WIN32
+/* Functions for dealing with Unicode on Windows. */
+FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+
+int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
+
+int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
+
+int x264_vfprintf( FILE *stream, const char *format, va_list arg )
+{
+    HANDLE console = NULL;
+    DWORD mode;
+
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+
+        int length = vsnprintf( buf, sizeof(buf), format, arg );
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+
+int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
+#endif

 
@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.c: platform-specific code
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,6 +27,11 @@
 
 #include "common.h"
 
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+
 #if SYS_WINDOWS
 #include <sys/types.h>
 #include <sys/timeb.h>
@@ -35,8 +41,6 @@
 #include <time.h>
 
 #if PTW32_STATIC_LIB
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
 /* this is a global in pthread-win32 to indicate if it has been initialized or not */
 extern int ptw32_processInitialized;
 #endif
@@ -134,3 +138,73 @@
 {}
 #endif
 #endif
+
+#ifdef _WIN32
+/* Functions for dealing with Unicode on Windows. */
+FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+
+int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
+
+int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
+
+int x264_vfprintf( FILE *stream, const char *format, va_list arg )
+{
+    HANDLE console = NULL;
+    DWORD mode;
+
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+
+        int length = vsnprintf( buf, sizeof(buf), format, arg );
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+
+int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
+#endif
​

x264-snapshot-20130723-2245.tar.bz2/common/osdep.h -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.h Changed

@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.h: platform-specific code
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,19 +33,21 @@
 #include <stdio.h>
 #include <sys/stat.h>
 #include <inttypes.h>
+#include <stdarg.h>
 
 #include "config.h"
 
+#ifdef __INTEL_COMPILER
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
 #endif
 
-#ifdef _WIN32
-#include <io.h>    // _setmode()
-#include <fcntl.h> // _O_BINARY
-#endif
-
 #ifdef __ICL
 #define inline __inline
 #define strcasecmp _stricmp
@@ -54,12 +57,6 @@
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
 #endif
 
-#ifdef __INTEL_COMPILER
-#include <mathimf.h>
-#else
-#include <math.h>
-#endif
-
 #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
 #define HAVE_X86_INLINE_ASM 1
 #endif
@@ -67,11 +64,29 @@
 #if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
 #define isfinite finite
 #endif
+
 #ifdef _WIN32
-#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't.
 #ifndef strtok_r
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
+
+#define utf8_to_utf16( utf8, utf16 )\
+    MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
+FILE *x264_fopen( const char *filename, const char *mode );
+int x264_rename( const char *oldname, const char *newname );
+#define x264_struct_stat struct _stati64
+#define x264_fstat _fstati64
+int x264_stat( const char *path, x264_struct_stat *buf );
+int x264_vfprintf( FILE *stream, const char *format, va_list arg );
+int x264_is_pipe( const char *path );
+#else
+#define x264_fopen       fopen
+#define x264_rename      rename
+#define x264_struct_stat struct stat
+#define x264_fstat       fstat
+#define x264_stat        stat
+#define x264_vfprintf    vfprintf
+#define x264_is_pipe(x)  0
 #endif
 
 #ifdef __ICL
@@ -111,7 +126,7 @@
 
 #define EXPAND(x) x
 
-#if HAVE_32B_STACK_ALIGNMENT
+#if STACK_ALIGNMENT >= 32
 #define ALIGNED_ARRAY_32( type, name, sub1, ... )\
     ALIGNED_32( type name sub1 __VA_ARGS__ )
 #else
@@ -364,19 +379,19 @@
 #define x264_lower_thread_priority(p)
 #endif
 
-static inline uint8_t x264_is_regular_file( FILE *filehandle )
+static inline int x264_is_regular_file( FILE *filehandle )
 {
-    struct stat file_stat;
-    if( fstat( fileno( filehandle ), &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_fstat( fileno( filehandle ), &file_stat ) )
+        return 1;
     return S_ISREG( file_stat.st_mode );
 }
 
-static inline uint8_t x264_is_regular_file_path( const char *filename )
+static inline int x264_is_regular_file_path( const char *filename )
 {
-    struct stat file_stat;
-    if( stat( filename, &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_stat( filename, &file_stat ) )
+        return !x264_is_pipe( filename );
     return S_ISREG( file_stat.st_mode );
 }

 
@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.h: platform-specific code
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,19 +33,21 @@
 #include <stdio.h>
 #include <sys/stat.h>
 #include <inttypes.h>
+#include <stdarg.h>
 
 #include "config.h"
 
+#ifdef __INTEL_COMPILER
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
 #endif
 
-#ifdef _WIN32
-#include <io.h>    // _setmode()
-#include <fcntl.h> // _O_BINARY
-#endif
-
 #ifdef __ICL
 #define inline __inline
 #define strcasecmp _stricmp
@@ -54,12 +57,6 @@
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
 #endif
 
-#ifdef __INTEL_COMPILER
-#include <mathimf.h>
-#else
-#include <math.h>
-#endif
-
 #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
 #define HAVE_X86_INLINE_ASM 1
 #endif
@@ -67,11 +64,29 @@
 #if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
 #define isfinite finite
 #endif
+
 #ifdef _WIN32
-#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't.
 #ifndef strtok_r
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
+
+#define utf8_to_utf16( utf8, utf16 )\
+    MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
+FILE *x264_fopen( const char *filename, const char *mode );
+int x264_rename( const char *oldname, const char *newname );
+#define x264_struct_stat struct _stati64
+#define x264_fstat _fstati64
+int x264_stat( const char *path, x264_struct_stat *buf );
+int x264_vfprintf( FILE *stream, const char *format, va_list arg );
+int x264_is_pipe( const char *path );
+#else
+#define x264_fopen       fopen
+#define x264_rename      rename
+#define x264_struct_stat struct stat
+#define x264_fstat       fstat
+#define x264_stat        stat
+#define x264_vfprintf    vfprintf
+#define x264_is_pipe(x)  0
 #endif
 
 #ifdef __ICL
@@ -111,7 +126,7 @@
 
 #define EXPAND(x) x
 
-#if HAVE_32B_STACK_ALIGNMENT
+#if STACK_ALIGNMENT >= 32
 #define ALIGNED_ARRAY_32( type, name, sub1, ... )\
     ALIGNED_32( type name sub1 __VA_ARGS__ )
 #else
@@ -364,19 +379,19 @@
 #define x264_lower_thread_priority(p)
 #endif
 
-static inline uint8_t x264_is_regular_file( FILE *filehandle )
+static inline int x264_is_regular_file( FILE *filehandle )
 {
-    struct stat file_stat;
-    if( fstat( fileno( filehandle ), &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_fstat( fileno( filehandle ), &file_stat ) )
+        return 1;
     return S_ISREG( file_stat.st_mode );
 }
 
-static inline uint8_t x264_is_regular_file_path( const char *filename )
+static inline int x264_is_regular_file_path( const char *filename )
 {
-    struct stat file_stat;
-    if( stat( filename, &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_stat( filename, &file_stat ) )
+        return !x264_is_pipe( filename );
     return S_ISREG( file_stat.st_mode );
 }
 
​

x264-snapshot-20130723-2245.tar.bz2/common/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -36,6 +36,7 @@
 #endif
 #if ARCH_ARM
 #   include "arm/pixel.h"
+#   include "arm/predict.h"
 #endif
 #if ARCH_UltraSPARC
 #   include "sparc/pixel.h"
@@ -532,6 +533,10 @@
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP_8x8( sad, _neon, _neon )
+INTRA_MBCMP_8x8(sa8d, _neon, _neon )
+#endif
 
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
 void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
@@ -555,16 +560,26 @@
 
 #if HAVE_MMX
 #if HIGH_BIT_DEPTH
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
 #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
 #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
 #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _mmx2, _c )
-INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _mmx2, _mmx2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse2, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _sse2, _sse2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _ssse3, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse4, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _avx, _sse2 )
 #else
 #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
 INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
@@ -577,6 +592,16 @@
 INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _xop, _mmx2 )
 #endif
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP(satd,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
+INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
+#endif
 
 // No C implementation of intra_satd_x9. See checkasm for its behavior,
 // or see x264_mb_analyse_intra for the entirely different algorithm we
@@ -868,6 +893,8 @@
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
     }
@@ -909,6 +936,8 @@
         pixf->asd8 = x264_pixel_asd8_sse2;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
     }
     if( cpu&X264_CPU_SSE2_IS_FAST )
@@ -948,6 +977,8 @@
         pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
     }
     if( cpu&X264_CPU_SSE4 )
@@ -963,6 +994,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
     }
     if( cpu&X264_CPU_AVX )
     {
@@ -985,6 +1017,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
     }
     if( cpu&X264_CPU_XOP )
     {
@@ -1119,12 +1152,6 @@
                pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
            }
         }
-
-        if( cpu&X264_CPU_SSE_MISALIGN )
-        {
-            INIT2( sad_x3, _sse2_misalign );
-            INIT2( sad_x4, _sse2_misalign );
-        }
     }
 
     if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
@@ -1201,9 +1228,8 @@
         }
         else
         {
-            pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3;
-            pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3;
-            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3;
+            INIT2( sad_x3, _ssse3 );
+            INIT5( sad_x4, _ssse3 );
         }
         if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
         {
@@ -1237,6 +1263,8 @@
     if( cpu&X264_CPU_AVX )
     {
         INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
+        INIT2( sad_x3, _avx );
+        INIT2( sad_x4, _avx );
         INIT8( satd, _avx );
         INIT7( satd_x3, _avx );
         INIT7( satd_x4, _avx );
@@ -1334,8 +1362,21 @@
         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
         pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
+        pixf->var[PIXEL_8x16]   = x264_pixel_var_8x16_neon;
         pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
         pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8_neon;
+        pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_neon;
+
+        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
+        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
+        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
+        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
+        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
+        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
+        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
+        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
 
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
         pixf->ssim_end4         = x264_pixel_ssim_end4_neon;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -36,6 +36,7 @@
 #endif
 #if ARCH_ARM
 #   include "arm/pixel.h"
+#   include "arm/predict.h"
 #endif
 #if ARCH_UltraSPARC
 #   include "sparc/pixel.h"
@@ -532,6 +533,10 @@
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP_8x8( sad, _neon, _neon )
+INTRA_MBCMP_8x8(sa8d, _neon, _neon )
+#endif
 
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
 void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
@@ -555,16 +560,26 @@
 
 #if HAVE_MMX
 #if HIGH_BIT_DEPTH
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
 #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
 #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
 #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _mmx2, _c )
-INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _mmx2, _mmx2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse2, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _sse2, _sse2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _ssse3, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse4, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _avx, _sse2 )
 #else
 #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
 INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
@@ -577,6 +592,16 @@
 INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _xop, _mmx2 )
 #endif
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP(satd,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
+INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
+#endif
 
 // No C implementation of intra_satd_x9. See checkasm for its behavior,
 // or see x264_mb_analyse_intra for the entirely different algorithm we
@@ -868,6 +893,8 @@
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
     }
@@ -909,6 +936,8 @@
         pixf->asd8 = x264_pixel_asd8_sse2;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
     }
     if( cpu&X264_CPU_SSE2_IS_FAST )
@@ -948,6 +977,8 @@
         pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
     }
     if( cpu&X264_CPU_SSE4 )
@@ -963,6 +994,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
     }
     if( cpu&X264_CPU_AVX )
     {
@@ -985,6 +1017,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
     }
     if( cpu&X264_CPU_XOP )
     {
@@ -1119,12 +1152,6 @@
                pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
            }
         }
-
-        if( cpu&X264_CPU_SSE_MISALIGN )
-        {
-            INIT2( sad_x3, _sse2_misalign );
-            INIT2( sad_x4, _sse2_misalign );
-        }
     }
 
     if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
@@ -1201,9 +1228,8 @@
         }
         else
         {
-            pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3;
-            pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3;
-            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3;
+            INIT2( sad_x3, _ssse3 );
+            INIT5( sad_x4, _ssse3 );
         }
         if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
         {
@@ -1237,6 +1263,8 @@
     if( cpu&X264_CPU_AVX )
     {
         INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
+        INIT2( sad_x3, _avx );
+        INIT2( sad_x4, _avx );
         INIT8( satd, _avx );
         INIT7( satd_x3, _avx );
         INIT7( satd_x4, _avx );
@@ -1334,8 +1362,21 @@
         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
         pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
+        pixf->var[PIXEL_8x16]   = x264_pixel_var_8x16_neon;
         pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
         pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8_neon;
+        pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_neon;
+
+        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
+        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
+        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
+        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
+        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
+        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
+        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
+        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
 
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
         pixf->ssim_end4         = x264_pixel_ssim_end4_neon;
​

x264-snapshot-20130723-2245.tar.bz2/common/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.h Changed

 
@@ -1,11 +1,11 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
-            Henrik Gramner <hengar-6@student.ltu.se>
+            Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.c: ppc transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *          Eric Petit <eric.petit@lapsus.org>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: ppc transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/deblock.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.c: ppc deblocking
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: ppc motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: ppc motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: ppc pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: ppc pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/ppccommon.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/ppccommon.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ppccommon.h: ppc utility macros
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: ppc intra prediction
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: ppc intra prediction
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.c: ppc quantization
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.c: ppc quantization
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/predict.c Changed

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * predict.c: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/predict.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/quant.c Changed

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * quant.c: quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Christian Heine <sennindemokrit@gmx.net>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/rectangle.c -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rectangle.c: rectangle filling
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/rectangle.h -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rectangle.h: rectangle filling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/set.c -> x264-snapshot-20140321-2245.tar.bz2/common/set.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.c: quantization init
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -105,9 +105,9 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
             CHECKED_MALLOC( h->dequant##w##_mf[i],  6*size*sizeof(int) );\
-            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\
+            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\
         }\
         for( j = 0; j < i; j++ )\
             if( deadzone[j] == deadzone[i] &&\
@@ -120,8 +120,8 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
-            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
         }\
     }
 
@@ -159,7 +159,7 @@
                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
             }
     }
-    for( int q = 0; q < QP_MAX+1; q++ )
+    for( int q = 0; q <= QP_MAX_SPEC; q++ )
     {
         int j;
         for( int i_list = 0; i_list < 4; i_list++ )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.c: quantization init
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -105,9 +105,9 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
             CHECKED_MALLOC( h->dequant##w##_mf[i],  6*size*sizeof(int) );\
-            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\
+            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\
         }\
         for( j = 0; j < i; j++ )\
             if( deadzone[j] == deadzone[i] &&\
@@ -120,8 +120,8 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
-            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
         }\
     }
 
@@ -159,7 +159,7 @@
                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
             }
     }
-    for( int q = 0; q < QP_MAX+1; q++ )
+    for( int q = 0; q <= QP_MAX_SPEC; q++ )
     {
         int j;
         for( int i_list = 0; i_list < 4; i_list++ )
​

x264-snapshot-20130723-2245.tar.bz2/common/set.h -> x264-snapshot-20140321-2245.tar.bz2/common/set.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: quantization init
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -248,6 +248,98 @@
     x264_cqm_jvt8i, x264_cqm_jvt8p
 };
 
+// 1080i25_avci50, 1080p25_avci50
+static const uint8_t x264_cqm_avci50_4ic[16] =
+{
+    16,22,28,40,
+    22,28,40,44,
+    28,40,44,48,
+    40,44,48,60
+};
+
+//  1080i25_avci50,
+static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
+{
+    16,18,19,21,27,33,81,87,
+    18,19,21,24,30,33,81,87,
+    19,21,24,27,30,78,84,90,
+    21,24,27,30,33,78,84,90,
+    24,27,30,33,78,81,84,90,
+    24,27,30,33,78,81,84,93,
+    27,30,33,78,78,81,87,93,
+    30,33,33,78,81,84,87,96
+};
+
+//  1080p25_avci50, 720p25_avci50, 720p50_avci50
+static const uint8_t x264_cqm_avci50_p_8iy[64] =
+{
+    16,18,19,21,24,27,30,33,
+    18,19,21,24,27,30,33,78,
+    19,21,24,27,30,33,78,81,
+    21,24,27,30,33,78,81,84,
+    24,27,30,33,78,81,84,87,
+    27,30,33,78,81,84,87,90,
+    30,33,78,81,84,87,90,93,
+    33,78,81,84,87,90,93,96
+};
+
+//  1080i25_avci100, 1080p25_avci100
+static const uint8_t x264_cqm_avci100_1080_4ic[16] =
+{
+    16,20,26,32,
+    20,26,32,38,
+    26,32,38,44,
+    32,38,44,50
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_4ic[16] =
+{
+    16,21,27,34,
+    21,27,34,41,
+    27,34,41,46,
+    34,41,46,54
+};
+
+//  1080i25_avci100,
+static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
+{
+    16,19,20,23,24,26,32,42,
+    18,19,22,24,26,32,36,42,
+    18,20,23,24,26,32,36,63,
+    19,20,23,26,32,36,42,63,
+    20,22,24,26,32,36,59,63,
+    22,23,24,26,32,36,59,68,
+    22,23,24,26,32,42,59,68,
+    22,23,24,26,36,42,59,72
+};
+
+// 1080p25_avci100,
+static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
+{
+    16,18,19,20,22,23,24,26,
+    18,19,20,22,23,24,26,32,
+    19,20,22,23,24,26,32,36,
+    20,22,23,24,26,32,36,42,
+    22,23,24,26,32,36,42,59,
+    23,24,26,32,36,42,59,63,
+    24,26,32,36,42,59,63,68,
+    26,32,36,42,59,63,68,72
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_8iy[64] =
+{
+    16,18,19,21,22,24,26,32,
+    18,19,19,21,22,24,26,32,
+    19,19,21,22,22,24,26,32,
+    21,21,22,22,23,24,26,34,
+    22,22,22,23,24,25,26,34,
+    24,24,24,24,25,26,34,36,
+    26,26,26,26,26,34,36,38,
+    32,32,32,34,34,36,38,42
+};
+
 int  x264_cqm_init( x264_t *h );
 void x264_cqm_delete( x264_t *h );
 int  x264_cqm_parse_file( x264_t *h, const char *filename );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: quantization init
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -248,6 +248,98 @@
     x264_cqm_jvt8i, x264_cqm_jvt8p
 };
 
+// 1080i25_avci50, 1080p25_avci50
+static const uint8_t x264_cqm_avci50_4ic[16] =
+{
+    16,22,28,40,
+    22,28,40,44,
+    28,40,44,48,
+    40,44,48,60
+};
+
+//  1080i25_avci50,
+static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
+{
+    16,18,19,21,27,33,81,87,
+    18,19,21,24,30,33,81,87,
+    19,21,24,27,30,78,84,90,
+    21,24,27,30,33,78,84,90,
+    24,27,30,33,78,81,84,90,
+    24,27,30,33,78,81,84,93,
+    27,30,33,78,78,81,87,93,
+    30,33,33,78,81,84,87,96
+};
+
+//  1080p25_avci50, 720p25_avci50, 720p50_avci50
+static const uint8_t x264_cqm_avci50_p_8iy[64] =
+{
+    16,18,19,21,24,27,30,33,
+    18,19,21,24,27,30,33,78,
+    19,21,24,27,30,33,78,81,
+    21,24,27,30,33,78,81,84,
+    24,27,30,33,78,81,84,87,
+    27,30,33,78,81,84,87,90,
+    30,33,78,81,84,87,90,93,
+    33,78,81,84,87,90,93,96
+};
+
+//  1080i25_avci100, 1080p25_avci100
+static const uint8_t x264_cqm_avci100_1080_4ic[16] =
+{
+    16,20,26,32,
+    20,26,32,38,
+    26,32,38,44,
+    32,38,44,50
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_4ic[16] =
+{
+    16,21,27,34,
+    21,27,34,41,
+    27,34,41,46,
+    34,41,46,54
+};
+
+//  1080i25_avci100,
+static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
+{
+    16,19,20,23,24,26,32,42,
+    18,19,22,24,26,32,36,42,
+    18,20,23,24,26,32,36,63,
+    19,20,23,26,32,36,42,63,
+    20,22,24,26,32,36,59,63,
+    22,23,24,26,32,36,59,68,
+    22,23,24,26,32,42,59,68,
+    22,23,24,26,36,42,59,72
+};
+
+// 1080p25_avci100,
+static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
+{
+    16,18,19,20,22,23,24,26,
+    18,19,20,22,23,24,26,32,
+    19,20,22,23,24,26,32,36,
+    20,22,23,24,26,32,36,42,
+    22,23,24,26,32,36,42,59,
+    23,24,26,32,36,42,59,63,
+    24,26,32,36,42,59,63,68,
+    26,32,36,42,59,63,68,72
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_8iy[64] =
+{
+    16,18,19,21,22,24,26,32,
+    18,19,19,21,22,24,26,32,
+    19,19,21,22,22,24,26,32,
+    21,21,22,22,23,24,26,34,
+    22,22,22,23,24,25,26,34,
+    24,24,24,24,25,26,34,36,
+    26,26,26,26,26,34,36,38,
+    32,32,32,34,34,36,38,42
+};
+
 int  x264_cqm_init( x264_t *h );
 void x264_cqm_delete( x264_t *h );
 int  x264_cqm_parse_file( x264_t *h, const char *filename );
​

x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.asm -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.asm Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.asm: sparc pixel metrics
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Phil Jensen <philj@csufresno.edu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: sparc pixel metrics
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Phil Jensen <philj@csufresno.edu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/threadpool.c -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * threadpool.c: thread pooling
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/threadpool.h -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * threadpool.h: thread pooling
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/vlc.c -> x264-snapshot-20140321-2245.tar.bz2/common/vlc.c Changed

 
@@ -1,11 +1,11 @@
 /*****************************************************************************
  * vlc.c : vlc tables
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/win32thread.c -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.c: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Pegasys Inc. <http://www.pegasys-inc.com>
@@ -261,7 +261,7 @@
 int x264_win32_threading_init( void )
 {
     /* find function pointers to API functions, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
     if( thread_control.cond_init )
     {
@@ -288,7 +288,7 @@
      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
 #if ARCH_X86_64
     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
     if( get_thread_affinity )
     {

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.c: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Pegasys Inc. <http://www.pegasys-inc.com>
@@ -261,7 +261,7 @@
 int x264_win32_threading_init( void )
 {
     /* find function pointers to API functions, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
     if( thread_control.cond_init )
     {
@@ -288,7 +288,7 @@
      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
 #if ARCH_X86_64
     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
     if( get_thread_affinity )
     {
​

x264-snapshot-20130723-2245.tar.bz2/common/win32thread.h -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.h: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -26,7 +26,6 @@
 #ifndef X264_WIN32THREAD_H
 #define X264_WIN32THREAD_H
 
-#define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 /* the following macro is used within x264 */
 #undef ERROR
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/bitstream-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/bitstream-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* bitstream-a.asm: x86 bitstream functions
 ;*****************************************************************************
-;* Copyright (C) 2010-2013 x264 project
+;* Copyright (C) 2010-2014 x264 project
 ;*
 ;* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
 ;*          Henrik Gramner <henrik@gramner.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/cabac-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cabac-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* cabac-a.asm: x86 cabac
 ;*****************************************************************************
-;* Copyright (C) 2008-2013 x264 project
+;* Copyright (C) 2008-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/const-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/const-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* const-a.asm: x86 global constants
 ;*****************************************************************************
-;* Copyright (C) 2010-2013 x264 project
+;* Copyright (C) 2010-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -36,6 +36,7 @@
 const pw_512,      times 16 dw 512
 const pw_00ff,     times 16 dw 0x00ff
 const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
+const pw_0to15,    dw 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 const pd_1,        times 8 dd 1
 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/cpu-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cpu-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* cpu-a.asm: x86 cpu utilities
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -146,17 +146,6 @@
     sfence
     ret
 
-;-----------------------------------------------------------------------------
-; void cpu_mask_misalign_sse( void )
-;-----------------------------------------------------------------------------
-cglobal cpu_mask_misalign_sse
-    sub   rsp, 4
-    stmxcsr [rsp]
-    or dword [rsp], 1<<17
-    ldmxcsr [rsp]
-    add   rsp, 4
-    ret
-
 cextern intel_cpu_indicator_init
 
 ;-----------------------------------------------------------------------------
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-32.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-32.asm: x86_32 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-64.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-64.asm: x86_64 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-a.asm: x86 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Holger Lubitz <holger@lubitz.org>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -675,7 +675,7 @@
     mova        m6, [pw_pixel_max]
     mova        m7, [pd_32]
     pxor        m5, m5
-.loop
+.loop:
     mova        m3, [r1]
     paddd       m3, m7
     psrad       m3, 6         ; dc0   0 dc1   0 dc2   0 dc3   0
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: x86 transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/deblock-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/deblock-a.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* deblock-a.asm: x86 deblocking
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -621,7 +621,7 @@
     mov     r6, 2
     mova    m0, [pw_2]
     LOAD_AB aa, bb, r2d, r3d
-.loop
+.loop:
     mova    p2, [r4+r1]
     mova    p1, [r4+2*r1]
     mova    p0, [r4+r5]
@@ -671,7 +671,7 @@
     add     r4, r0     ; pix+4*stride
     mov     r6, 2
     mova    m0, [pw_2]
-.loop
+.loop:
     movu    q3, [r0-8]
     movu    q2, [r0+r1-8]
     movu    q1, [r0+r1*2-8]
@@ -804,35 +804,6 @@
 %define PASS8ROWS(base, base3, stride, stride3, offset) \
     PASS8ROWS(base+offset, base3+offset, stride, stride3)
 
-; in: 8 rows of 4 bytes in %4..%11
-; out: 4 rows of 8 bytes in m0..m3
-%macro TRANSPOSE4x8_LOAD 11
-    movh       m0, %4
-    movh       m2, %5
-    movh       m1, %6
-    movh       m3, %7
-    punpckl%1  m0, m2
-    punpckl%1  m1, m3
-    mova       m2, m0
-    punpckl%2  m0, m1
-    punpckh%2  m2, m1
-
-    movh       m4, %8
-    movh       m6, %9
-    movh       m5, %10
-    movh       m7, %11
-    punpckl%1  m4, m6
-    punpckl%1  m5, m7
-    mova       m6, m4
-    punpckl%2  m4, m5
-    punpckh%2  m6, m5
-
-    punpckh%3  m1, m0, m4
-    punpckh%3  m3, m2, m6
-    punpckl%3  m0, m4
-    punpckl%3  m2, m6
-%endmacro
-
 ; in: 4 rows of 8 bytes in m0..m3
 ; out: 8 rows of 4 bytes in %1..%8
 %macro TRANSPOSE8x4B_STORE 8
@@ -844,24 +815,24 @@
     punpcklbw  m2, m3
     punpcklwd  m1, m0, m2
     punpckhwd  m0, m2
-    movh       %1, m1
+    movd       %1, m1
     punpckhdq  m1, m1
-    movh       %2, m1
-    movh       %3, m0
+    movd       %2, m1
+    movd       %3, m0
     punpckhdq  m0, m0
-    movh       %4, m0
+    movd       %4, m0
 
     punpckhdq  m3, m3
     punpcklbw  m4, m5
     punpcklbw  m6, m3
     punpcklwd  m5, m4, m6
     punpckhwd  m4, m6
-    movh       %5, m5
+    movd       %5, m5
     punpckhdq  m5, m5
-    movh       %6, m5
-    movh       %7, m4
+    movd       %6, m5
+    movd       %7, m4
     punpckhdq  m4, m4
-    movh       %8, m4
+    movd       %8, m4
 %endmacro
 
 ; in: 8 rows of 4 bytes in %9..%10
@@ -877,34 +848,94 @@
     pextrd %8, %10, 3
 %endmacro
 
-%macro TRANSPOSE4x8B_LOAD 8
-    TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
-%endmacro
-
-%macro TRANSPOSE4x8W_LOAD 8
-%if mmsize==16
-    TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8
-%else
+; in: 4 rows of 4 words in %1..%4
+; out: 4 rows of 4 word in m0..m3
+; clobbers: m4
+%macro TRANSPOSE4x4W_LOAD 4-8
+%if mmsize==8
     SWAP  1, 4, 2, 3
-    mova  m0, [t5]
-    mova  m1, [t5+r1]
-    mova  m2, [t5+r1*2]
-    mova  m3, [t5+t6]
+    movq  m0, %1
+    movq  m1, %2
+    movq  m2, %3
+    movq  m3, %4
     TRANSPOSE4x4W 0, 1, 2, 3, 4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+    movhlps    m1, m0
+    movhlps    m3, m2
 %endif
 %endmacro
 
-%macro TRANSPOSE8x2W_STORE 8
+; in: 2 rows of 4 words in m1..m2
+; out: 4 rows of 2 words in %1..%4
+; clobbers: m0, m1
+%macro TRANSPOSE4x2W_STORE 4-8
+%if mmsize==8
     punpckhwd  m0, m1, m2
     punpcklwd  m1, m2
-%if mmsize==8
+%else
+    punpcklwd  m1, m2
+    movhlps    m0, m1
+%endif
     movd       %3, m0
     movd       %1, m1
     psrlq      m1, 32
     psrlq      m0, 32
     movd       %2, m1
     movd       %4, m0
+%endmacro
+
+; in: 4/8 rows of 4 words in %1..%8
+; out: 4 rows of 4/8 word in m0..m3
+; clobbers: m4, m5, m6, m7
+%macro TRANSPOSE4x8W_LOAD 8
+%if mmsize==8
+    TRANSPOSE4x4W_LOAD %1, %2, %3, %4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+
+    movq       m4, %5
+    movq       m6, %6
+    movq       m5, %7
+    movq       m7, %8
+    punpcklwd  m4, m6
+    punpcklwd  m5, m7
+    mova       m6, m4
+    punpckldq  m4, m5
+    punpckhdq  m6, m5
+
+    punpckhqdq m1, m0, m4
+    punpckhqdq m3, m2, m6
+    punpcklqdq m0, m4
+    punpcklqdq m2, m6
+%endif
+%endmacro
+
+; in: 2 rows of 4/8 words in m1..m2
+; out: 4/8 rows of 2 words in %1..%8
+; clobbers: m0, m1
+%macro TRANSPOSE8x2W_STORE 8
+%if mmsize==8
+    TRANSPOSE4x2W_STORE %1, %2, %3, %4
 %else

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* deblock-a.asm: x86 deblocking
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -621,7 +621,7 @@
     mov     r6, 2
     mova    m0, [pw_2]
     LOAD_AB aa, bb, r2d, r3d
-.loop
+.loop:
     mova    p2, [r4+r1]
     mova    p1, [r4+2*r1]
     mova    p0, [r4+r5]
@@ -671,7 +671,7 @@
     add     r4, r0     ; pix+4*stride
     mov     r6, 2
     mova    m0, [pw_2]
-.loop
+.loop:
     movu    q3, [r0-8]
     movu    q2, [r0+r1-8]
     movu    q1, [r0+r1*2-8]
@@ -804,35 +804,6 @@
 %define PASS8ROWS(base, base3, stride, stride3, offset) \
     PASS8ROWS(base+offset, base3+offset, stride, stride3)
 
-; in: 8 rows of 4 bytes in %4..%11
-; out: 4 rows of 8 bytes in m0..m3
-%macro TRANSPOSE4x8_LOAD 11
-    movh       m0, %4
-    movh       m2, %5
-    movh       m1, %6
-    movh       m3, %7
-    punpckl%1  m0, m2
-    punpckl%1  m1, m3
-    mova       m2, m0
-    punpckl%2  m0, m1
-    punpckh%2  m2, m1
-
-    movh       m4, %8
-    movh       m6, %9
-    movh       m5, %10
-    movh       m7, %11
-    punpckl%1  m4, m6
-    punpckl%1  m5, m7
-    mova       m6, m4
-    punpckl%2  m4, m5
-    punpckh%2  m6, m5
-
-    punpckh%3  m1, m0, m4
-    punpckh%3  m3, m2, m6
-    punpckl%3  m0, m4
-    punpckl%3  m2, m6
-%endmacro
-
 ; in: 4 rows of 8 bytes in m0..m3
 ; out: 8 rows of 4 bytes in %1..%8
 %macro TRANSPOSE8x4B_STORE 8
@@ -844,24 +815,24 @@
     punpcklbw  m2, m3
     punpcklwd  m1, m0, m2
     punpckhwd  m0, m2
-    movh       %1, m1
+    movd       %1, m1
     punpckhdq  m1, m1
-    movh       %2, m1
-    movh       %3, m0
+    movd       %2, m1
+    movd       %3, m0
     punpckhdq  m0, m0
-    movh       %4, m0
+    movd       %4, m0
 
     punpckhdq  m3, m3
     punpcklbw  m4, m5
     punpcklbw  m6, m3
     punpcklwd  m5, m4, m6
     punpckhwd  m4, m6
-    movh       %5, m5
+    movd       %5, m5
     punpckhdq  m5, m5
-    movh       %6, m5
-    movh       %7, m4
+    movd       %6, m5
+    movd       %7, m4
     punpckhdq  m4, m4
-    movh       %8, m4
+    movd       %8, m4
 %endmacro
 
 ; in: 8 rows of 4 bytes in %9..%10
@@ -877,34 +848,94 @@
     pextrd %8, %10, 3
 %endmacro
 
-%macro TRANSPOSE4x8B_LOAD 8
-    TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
-%endmacro
-
-%macro TRANSPOSE4x8W_LOAD 8
-%if mmsize==16
-    TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8
-%else
+; in: 4 rows of 4 words in %1..%4
+; out: 4 rows of 4 word in m0..m3
+; clobbers: m4
+%macro TRANSPOSE4x4W_LOAD 4-8
+%if mmsize==8
     SWAP  1, 4, 2, 3
-    mova  m0, [t5]
-    mova  m1, [t5+r1]
-    mova  m2, [t5+r1*2]
-    mova  m3, [t5+t6]
+    movq  m0, %1
+    movq  m1, %2
+    movq  m2, %3
+    movq  m3, %4
     TRANSPOSE4x4W 0, 1, 2, 3, 4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+    movhlps    m1, m0
+    movhlps    m3, m2
 %endif
 %endmacro
 
-%macro TRANSPOSE8x2W_STORE 8
+; in: 2 rows of 4 words in m1..m2
+; out: 4 rows of 2 words in %1..%4
+; clobbers: m0, m1
+%macro TRANSPOSE4x2W_STORE 4-8
+%if mmsize==8
     punpckhwd  m0, m1, m2
     punpcklwd  m1, m2
-%if mmsize==8
+%else
+    punpcklwd  m1, m2
+    movhlps    m0, m1
+%endif
     movd       %3, m0
     movd       %1, m1
     psrlq      m1, 32
     psrlq      m0, 32
     movd       %2, m1
     movd       %4, m0
+%endmacro
+
+; in: 4/8 rows of 4 words in %1..%8
+; out: 4 rows of 4/8 word in m0..m3
+; clobbers: m4, m5, m6, m7
+%macro TRANSPOSE4x8W_LOAD 8
+%if mmsize==8
+    TRANSPOSE4x4W_LOAD %1, %2, %3, %4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+
+    movq       m4, %5
+    movq       m6, %6
+    movq       m5, %7
+    movq       m7, %8
+    punpcklwd  m4, m6
+    punpcklwd  m5, m7
+    mova       m6, m4
+    punpckldq  m4, m5
+    punpckhdq  m6, m5
+
+    punpckhqdq m1, m0, m4
+    punpckhqdq m3, m2, m6
+    punpcklqdq m0, m4
+    punpcklqdq m2, m6
+%endif
+%endmacro
+
+; in: 2 rows of 4/8 words in m1..m2
+; out: 4/8 rows of 2 words in %1..%8
+; clobbers: m0, m1
+%macro TRANSPOSE8x2W_STORE 8
+%if mmsize==8
+    TRANSPOSE4x2W_STORE %1, %2, %3, %4
 %else
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -1029,59 +1029,48 @@
     jg     .height_loop
     RET
 
+INIT_XMM
 cglobal pixel_avg2_w16_sse2, 6,7
     sub    r4, r2
     lea    r6, [r4+r3]
 .height_loop:
-    movdqu xmm0, [r2]
-    movdqu xmm2, [r2+r3]
-    movdqu xmm1, [r2+r4]
-    movdqu xmm3, [r2+r6]
+    movu   m0, [r2]
+    movu   m2, [r2+r3]
+    movu   m1, [r2+r4]
+    movu   m3, [r2+r6]
     lea    r2, [r2+r3*2]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-    movdqa [r0], xmm0
-    movdqa [r0+r1], xmm2
+    pavgb  m0, m1
+    pavgb  m2, m3
+    mova [r0], m0
+    mova [r0+r1], m2
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
 
-%macro AVG2_W20 1
-cglobal pixel_avg2_w20_%1, 6,7
+cglobal pixel_avg2_w20_sse2, 6,7
     sub    r2, r4
     lea    r6, [r2+r3]
 .height_loop:
-    movdqu xmm0, [r4]
-    movdqu xmm2, [r4+r3]
-%ifidn %1, sse2_misalign
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, [r4+r2]
-    pavgb  xmm2, [r4+r6]
-%else
-    movdqu xmm1, [r4+r2]
-    movdqu xmm3, [r4+r6]
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-%endif
-    pavgb  mm4,  [r4+r2+16]
-    pavgb  mm5,  [r4+r6+16]
+    movu   m0, [r4]
+    movu   m2, [r4+r3]
+    movu   m1, [r4+r2]
+    movu   m3, [r4+r6]
+    movd  mm4, [r4+16]
+    movd  mm5, [r4+r3+16]
+    pavgb  m0, m1
+    pavgb  m2, m3
+    pavgb mm4, [r4+r2+16]
+    pavgb mm5, [r4+r6+16]
     lea    r4, [r4+r3*2]
-    movdqa [r0], xmm0
-    movd   [r0+16], mm4
-    movdqa [r0+r1], xmm2
-    movd   [r0+r1+16], mm5
+    mova [r0], m0
+    mova [r0+r1], m2
+    movd [r0+16], mm4
+    movd [r0+r1+16], mm5
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
-%endmacro
-
-AVG2_W20 sse2
-AVG2_W20 sse2_misalign
 
 INIT_YMM avx2
 cglobal pixel_avg2_w20, 6,7
@@ -1524,7 +1513,7 @@
 %endmacro
 %else ; !HIGH_BIT_DEPTH
 %macro UNPACK_UNALIGNED 3
-%if mmsize == 8 || cpuflag(misalign)
+%if mmsize == 8
     punpcklwd  %1, %3
 %else
     movh       %2, %3
@@ -2130,8 +2119,6 @@
 %else ; !HIGH_BIT_DEPTH
 INIT_MMX mmx2
 MC_CHROMA
-INIT_XMM sse2, misalign
-MC_CHROMA
 INIT_XMM sse2
 MC_CHROMA
 INIT_XMM ssse3

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -1029,59 +1029,48 @@
     jg     .height_loop
     RET
 
+INIT_XMM
 cglobal pixel_avg2_w16_sse2, 6,7
     sub    r4, r2
     lea    r6, [r4+r3]
 .height_loop:
-    movdqu xmm0, [r2]
-    movdqu xmm2, [r2+r3]
-    movdqu xmm1, [r2+r4]
-    movdqu xmm3, [r2+r6]
+    movu   m0, [r2]
+    movu   m2, [r2+r3]
+    movu   m1, [r2+r4]
+    movu   m3, [r2+r6]
     lea    r2, [r2+r3*2]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-    movdqa [r0], xmm0
-    movdqa [r0+r1], xmm2
+    pavgb  m0, m1
+    pavgb  m2, m3
+    mova [r0], m0
+    mova [r0+r1], m2
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
 
-%macro AVG2_W20 1
-cglobal pixel_avg2_w20_%1, 6,7
+cglobal pixel_avg2_w20_sse2, 6,7
     sub    r2, r4
     lea    r6, [r2+r3]
 .height_loop:
-    movdqu xmm0, [r4]
-    movdqu xmm2, [r4+r3]
-%ifidn %1, sse2_misalign
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, [r4+r2]
-    pavgb  xmm2, [r4+r6]
-%else
-    movdqu xmm1, [r4+r2]
-    movdqu xmm3, [r4+r6]
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-%endif
-    pavgb  mm4,  [r4+r2+16]
-    pavgb  mm5,  [r4+r6+16]
+    movu   m0, [r4]
+    movu   m2, [r4+r3]
+    movu   m1, [r4+r2]
+    movu   m3, [r4+r6]
+    movd  mm4, [r4+16]
+    movd  mm5, [r4+r3+16]
+    pavgb  m0, m1
+    pavgb  m2, m3
+    pavgb mm4, [r4+r2+16]
+    pavgb mm5, [r4+r6+16]
     lea    r4, [r4+r3*2]
-    movdqa [r0], xmm0
-    movd   [r0+16], mm4
-    movdqa [r0+r1], xmm2
-    movd   [r0+r1+16], mm5
+    mova [r0], m0
+    mova [r0+r1], m2
+    movd [r0+16], mm4
+    movd [r0+r1+16], mm5
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
-%endmacro
-
-AVG2_W20 sse2
-AVG2_W20 sse2_misalign
 
 INIT_YMM avx2
 cglobal pixel_avg2_w20, 6,7
@@ -1524,7 +1513,7 @@
 %endmacro
 %else ; !HIGH_BIT_DEPTH
 %macro UNPACK_UNALIGNED 3
-%if mmsize == 8 || cpuflag(misalign)
+%if mmsize == 8
     punpcklwd  %1, %3
 %else
     movh       %2, %3
@@ -2130,8 +2119,6 @@
 %else ; !HIGH_BIT_DEPTH
 INIT_MMX mmx2
 MC_CHROMA
-INIT_XMM sse2, misalign
-MC_CHROMA
 INIT_XMM sse2
 MC_CHROMA
 INIT_XMM ssse3
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a2.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a2.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a2.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,6 +32,7 @@
 
 SECTION_RODATA 32
 
+pw_1024: times 16 dw 1024
 filt_mul20: times 32 db 20
 filt_mul15: times 16 db 1, -5
 filt_mul51: times 16 db -5, 1
@@ -39,17 +40,25 @@
 deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
 
 %if HIGH_BIT_DEPTH
+v210_mask: times 4 dq 0xc00ffc003ff003ff
+v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15
+v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14
+; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register
+v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800
+           dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800
+
 deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14
 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15
 %else
+deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1
+                       db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1
+
 deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
 deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
-%endif
-pw_1024: times 16 dw 1024
+%endif ; !HIGH_BIT_DEPTH
 
 pd_16: times 4 dd 16
 pd_0f: times 4 dd 0xffff
-pf_inv256: times 8 dd 0.00390625
 
 pad10: times 8 dw    10*PIXEL_MAX
 pad20: times 8 dw    20*PIXEL_MAX
@@ -60,16 +69,22 @@
 tap2: times 4 dw 20, 20
 tap3: times 4 dw -5,  1
 
+pw_0xc000: times 8 dw 0xc000
+pw_31: times 8 dw 31
+pd_4: times 4 dd 4
+
 SECTION .text
 
 cextern pb_0
 cextern pw_1
+cextern pw_8
 cextern pw_16
 cextern pw_32
 cextern pw_512
 cextern pw_00ff
 cextern pw_3fff
 cextern pw_pixel_max
+cextern pw_0to15
 cextern pd_ffff
 
 %macro LOAD_ADD 4
@@ -482,7 +497,7 @@
     %define pw_rnd [pw_32]
 %endif
 ; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer...
-%if cpuflag(misalign) || mmsize==32
+%if mmsize==32
 .loop:
     movu    m4, [src-4]
     movu    m5, [src-2]
@@ -630,8 +645,6 @@
 HPEL_V 0
 INIT_XMM sse2
 HPEL_V 8
-INIT_XMM sse2, misalign
-HPEL_C
 %if ARCH_X86_64 == 0
 INIT_XMM sse2
 HPEL_C
@@ -1197,6 +1210,163 @@
     RET
 %endmacro ; PLANE_DEINTERLEAVE
 
+%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2
+%if cpuflag(ssse3)
+    mova        m3, [deinterleave_rgb_shuf+(%1-3)*16]
+%endif
+%%loopy:
+    mov         %8, r6
+    mov         %9, %6
+%%loopx:
+    movu        m0, [%8]
+    movu        m1, [%8+%1*mmsize/4]
+%if cpuflag(ssse3)
+    pshufb      m0, m3        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    pshufb      m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%elif %1 == 3
+    psrldq      m2, m0, 6
+    punpcklqdq  m0, m1        ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5
+    psrldq      m1, 6
+    punpcklqdq  m2, m1        ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7
+    psrlq       m3, m0, 24
+    psrlq       m4, m2, 24
+    punpckhbw   m1, m0, m3    ; b4 b5 g4 g5 r4 r5
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m2, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m2, m4        ; b2 b3 g2 g3 r2 r3
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%else
+    pshufd      m3, m0, q2301
+    pshufd      m4, m1, q2301
+    punpckhbw   m2, m0, m3    ; b2 b3 g2 g3 r2 r3
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m1, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m1, m4        ; b4 b5 g4 g5 r4 r5
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%endif
+    punpckldq   m2, m0, m1    ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7
+    punpckhdq   m0, m1        ; r0 r1 r2 r3 r4 r5 r6 r7
+    movh   [r0+%9], m2
+    movhps [r2+%9], m2
+    movh   [r4+%9], m0
+    add         %8, %1*mmsize/2
+    add         %9, mmsize/2
+    jl %%loopx
+    add         r0, %2
+    add         r2, %3
+    add         r4, %4
+    add         r6, %5
+    dec        %7d
+    jg %%loopy
+%endmacro
+
+%macro PLANE_DEINTERLEAVE_RGB 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta,
+;                                        pixel *dstb, intptr_t i_dstb,
+;                                        pixel *dstc, intptr_t i_dstc,
+;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_rgb, 8,12
+    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
+    mov        r8d, r9m
+    mov        r9d, r10m
+    add         r0, r8
+    add         r2, r8
+    add         r4, r8
+    neg         r8
+%else
+cglobal plane_copy_deinterleave_rgb, 1,7
+    %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5
+    mov         r1, r9m
+    mov         r2, r2m
+    mov         r4, r4m
+    mov         r6, r6m
+    add         r0, r1
+    add         r2, r1
+    add         r4, r1
+    neg         r1
+    mov        r9m, r1
+    mov         r1, r10m
+%endif
+    cmp  dword r8m, 4
+    je .pw4
+    PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR
+    jmp .ret
+.pw4:
+    PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA
+.ret:
+    REP_RET
+%endmacro
+
+%if HIGH_BIT_DEPTH == 0
+INIT_XMM sse2
+PLANE_DEINTERLEAVE_RGB
+INIT_XMM ssse3
+PLANE_DEINTERLEAVE_RGB
+%endif ; !HIGH_BIT_DEPTH
+
+%macro PLANE_DEINTERLEAVE_V210 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty,
+;                                         uint16_t *dstc, intptr_t i_dstc,
+;                                         uint32_t *src, intptr_t i_src, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_v210, 8,10,7
+%define src   r8
+%define org_w r9

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a2.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,6 +32,7 @@
 
 SECTION_RODATA 32
 
+pw_1024: times 16 dw 1024
 filt_mul20: times 32 db 20
 filt_mul15: times 16 db 1, -5
 filt_mul51: times 16 db -5, 1
@@ -39,17 +40,25 @@
 deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
 
 %if HIGH_BIT_DEPTH
+v210_mask: times 4 dq 0xc00ffc003ff003ff
+v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15
+v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14
+; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register
+v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800
+           dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800
+
 deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14
 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15
 %else
+deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1
+                       db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1
+
 deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
 deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
-%endif
-pw_1024: times 16 dw 1024
+%endif ; !HIGH_BIT_DEPTH
 
 pd_16: times 4 dd 16
 pd_0f: times 4 dd 0xffff
-pf_inv256: times 8 dd 0.00390625
 
 pad10: times 8 dw    10*PIXEL_MAX
 pad20: times 8 dw    20*PIXEL_MAX
@@ -60,16 +69,22 @@
 tap2: times 4 dw 20, 20
 tap3: times 4 dw -5,  1
 
+pw_0xc000: times 8 dw 0xc000
+pw_31: times 8 dw 31
+pd_4: times 4 dd 4
+
 SECTION .text
 
 cextern pb_0
 cextern pw_1
+cextern pw_8
 cextern pw_16
 cextern pw_32
 cextern pw_512
 cextern pw_00ff
 cextern pw_3fff
 cextern pw_pixel_max
+cextern pw_0to15
 cextern pd_ffff
 
 %macro LOAD_ADD 4
@@ -482,7 +497,7 @@
     %define pw_rnd [pw_32]
 %endif
 ; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer...
-%if cpuflag(misalign) || mmsize==32
+%if mmsize==32
 .loop:
     movu    m4, [src-4]
     movu    m5, [src-2]
@@ -630,8 +645,6 @@
 HPEL_V 0
 INIT_XMM sse2
 HPEL_V 8
-INIT_XMM sse2, misalign
-HPEL_C
 %if ARCH_X86_64 == 0
 INIT_XMM sse2
 HPEL_C
@@ -1197,6 +1210,163 @@
     RET
 %endmacro ; PLANE_DEINTERLEAVE
 
+%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2
+%if cpuflag(ssse3)
+    mova        m3, [deinterleave_rgb_shuf+(%1-3)*16]
+%endif
+%%loopy:
+    mov         %8, r6
+    mov         %9, %6
+%%loopx:
+    movu        m0, [%8]
+    movu        m1, [%8+%1*mmsize/4]
+%if cpuflag(ssse3)
+    pshufb      m0, m3        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    pshufb      m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%elif %1 == 3
+    psrldq      m2, m0, 6
+    punpcklqdq  m0, m1        ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5
+    psrldq      m1, 6
+    punpcklqdq  m2, m1        ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7
+    psrlq       m3, m0, 24
+    psrlq       m4, m2, 24
+    punpckhbw   m1, m0, m3    ; b4 b5 g4 g5 r4 r5
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m2, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m2, m4        ; b2 b3 g2 g3 r2 r3
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%else
+    pshufd      m3, m0, q2301
+    pshufd      m4, m1, q2301
+    punpckhbw   m2, m0, m3    ; b2 b3 g2 g3 r2 r3
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m1, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m1, m4        ; b4 b5 g4 g5 r4 r5
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%endif
+    punpckldq   m2, m0, m1    ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7
+    punpckhdq   m0, m1        ; r0 r1 r2 r3 r4 r5 r6 r7
+    movh   [r0+%9], m2
+    movhps [r2+%9], m2
+    movh   [r4+%9], m0
+    add         %8, %1*mmsize/2
+    add         %9, mmsize/2
+    jl %%loopx
+    add         r0, %2
+    add         r2, %3
+    add         r4, %4
+    add         r6, %5
+    dec        %7d
+    jg %%loopy
+%endmacro
+
+%macro PLANE_DEINTERLEAVE_RGB 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta,
+;                                        pixel *dstb, intptr_t i_dstb,
+;                                        pixel *dstc, intptr_t i_dstc,
+;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_rgb, 8,12
+    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
+    mov        r8d, r9m
+    mov        r9d, r10m
+    add         r0, r8
+    add         r2, r8
+    add         r4, r8
+    neg         r8
+%else
+cglobal plane_copy_deinterleave_rgb, 1,7
+    %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5
+    mov         r1, r9m
+    mov         r2, r2m
+    mov         r4, r4m
+    mov         r6, r6m
+    add         r0, r1
+    add         r2, r1
+    add         r4, r1
+    neg         r1
+    mov        r9m, r1
+    mov         r1, r10m
+%endif
+    cmp  dword r8m, 4
+    je .pw4
+    PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR
+    jmp .ret
+.pw4:
+    PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA
+.ret:
+    REP_RET
+%endmacro
+
+%if HIGH_BIT_DEPTH == 0
+INIT_XMM sse2
+PLANE_DEINTERLEAVE_RGB
+INIT_XMM ssse3
+PLANE_DEINTERLEAVE_RGB
+%endif ; !HIGH_BIT_DEPTH
+
+%macro PLANE_DEINTERLEAVE_V210 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty,
+;                                         uint16_t *dstc, intptr_t i_dstc,
+;                                         uint32_t *src, intptr_t i_src, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_v210, 8,10,7
+%define src   r8
+%define org_w r9
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-c.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -116,6 +116,23 @@
 void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
                                        uint16_t *dstv, intptr_t i_dstv,
                                        uint16_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx  ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
 void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
@@ -144,13 +161,13 @@
 void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
-void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                            uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
 
 #define MC_CHROMA(cpu)\
@@ -158,7 +175,6 @@
                            int dx, int dy, int i_width, int i_height );
 MC_CHROMA(mmx2)
 MC_CHROMA(sse2)
-MC_CHROMA(sse2_misalign)
 MC_CHROMA(ssse3)
 MC_CHROMA(ssse3_cache64)
 MC_CHROMA(avx)
@@ -186,7 +202,6 @@
 PIXEL_AVG_WALL(cache64_mmx2)
 PIXEL_AVG_WALL(cache64_sse2)
 PIXEL_AVG_WALL(sse2)
-PIXEL_AVG_WALL(sse2_misalign)
 PIXEL_AVG_WALL(cache64_ssse3)
 PIXEL_AVG_WALL(avx2)
 
@@ -227,7 +242,6 @@
 PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2)
 #endif
 PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2)
-PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign)
 PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2)
@@ -429,7 +443,6 @@
 GET_REF(cache32_mmx2)
 GET_REF(cache64_mmx2)
 #endif
-GET_REF(sse2_misalign)
 GET_REF(cache64_sse2)
 GET_REF(cache64_ssse3)
 GET_REF(cache64_ssse3_atom)
@@ -477,7 +490,6 @@
 HPEL(16, avx, avx, avx, avx)
 HPEL(32, avx2, avx2, avx2, avx2)
 #endif
-HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
 #endif // HIGH_BIT_DEPTH
 
 static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
@@ -521,6 +533,113 @@
 PLANE_INTERLEAVE(avx)
 #endif
 
+#if HAVE_X86_INLINE_ASM
+#define CLIP_ADD(s,x)\
+do\
+{\
+    int temp;\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %2, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %1         \n"\
+        :"+m"(s), "=&r"(temp)\
+        :"m"(x)\
+    );\
+    s = temp;\
+} while(0)
+
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %1, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %0         \n"\
+        :"+m"(M32(s))\
+        :"m"(M32(x))\
+    );\
+} while(0)
+#else
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    CLIP_ADD((s)[0], (x)[0]);\
+    CLIP_ADD((s)[1], (x)[1]);\
+} while(0)
+#endif
+
+#define PROPAGATE_LIST(cpu)\
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
+                                                uint16_t *lowres_costs, int16_t *output,\
+                                                int bipred_weight, int mb_y, int len );\
+\
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
+                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
+                                              int bipred_weight, int mb_y, int len, int list )\
+{\
+    int16_t *current = h->scratch_buffer2;\
+\
+    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
+                                               current, bipred_weight, mb_y, len );\
+\
+    unsigned stride = h->mb.i_mb_stride;\
+    unsigned width = h->mb.i_mb_width;\
+    unsigned height = h->mb.i_mb_height;\
+\
+    for( unsigned i = 0; i < len; current += 32 )\
+    {\
+        int end = X264_MIN( i+8, len );\
+        for( ; i < end; i++, current += 2 )\
+        {\
+            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
+                continue;\
+\
+            unsigned mbx = current[0];\
+            unsigned mby = current[1];\
+            unsigned idx0 = mbx + mby * stride;\
+            unsigned idx2 = idx0 + stride;\
+\
+            /* Shortcut for the simple/common case of zero MV */\
+            if( !M32( mvs[i] ) )\
+            {\
+                CLIP_ADD( ref_costs[idx0], current[16] );\
+                continue;\
+            }\
+\
+            if( mbx < width-1 && mby < height-1 )\
+            {\
+                CLIP_ADD2( ref_costs+idx0, current+16 );\
+                CLIP_ADD2( ref_costs+idx2, current+32 );\
+            }\
+            else\
+            {\
+                /* Note: this takes advantage of unsigned representation to\
+                 * catch negative mbx/mby. */\
+                if( mby < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx0+0], current[16] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx0+1], current[17] );\
+                }\
+                if( mby+1 < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx2+0], current[32] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx2+1], current[33] );\
+                }\
+            }\
+        }\
+    }\
+}
+
+PROPAGATE_LIST(ssse3)
+PROPAGATE_LIST(avx)
+#undef CLIP_ADD

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -116,6 +116,23 @@
 void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
                                        uint16_t *dstv, intptr_t i_dstv,
                                        uint16_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx  ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
 void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
@@ -144,13 +161,13 @@
 void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
-void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                            uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
 
 #define MC_CHROMA(cpu)\
@@ -158,7 +175,6 @@
                            int dx, int dy, int i_width, int i_height );
 MC_CHROMA(mmx2)
 MC_CHROMA(sse2)
-MC_CHROMA(sse2_misalign)
 MC_CHROMA(ssse3)
 MC_CHROMA(ssse3_cache64)
 MC_CHROMA(avx)
@@ -186,7 +202,6 @@
 PIXEL_AVG_WALL(cache64_mmx2)
 PIXEL_AVG_WALL(cache64_sse2)
 PIXEL_AVG_WALL(sse2)
-PIXEL_AVG_WALL(sse2_misalign)
 PIXEL_AVG_WALL(cache64_ssse3)
 PIXEL_AVG_WALL(avx2)
 
@@ -227,7 +242,6 @@
 PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2)
 #endif
 PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2)
-PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign)
 PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2)
@@ -429,7 +443,6 @@
 GET_REF(cache32_mmx2)
 GET_REF(cache64_mmx2)
 #endif
-GET_REF(sse2_misalign)
 GET_REF(cache64_sse2)
 GET_REF(cache64_ssse3)
 GET_REF(cache64_ssse3_atom)
@@ -477,7 +490,6 @@
 HPEL(16, avx, avx, avx, avx)
 HPEL(32, avx2, avx2, avx2, avx2)
 #endif
-HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
 #endif // HIGH_BIT_DEPTH
 
 static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
@@ -521,6 +533,113 @@
 PLANE_INTERLEAVE(avx)
 #endif
 
+#if HAVE_X86_INLINE_ASM
+#define CLIP_ADD(s,x)\
+do\
+{\
+    int temp;\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %2, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %1         \n"\
+        :"+m"(s), "=&r"(temp)\
+        :"m"(x)\
+    );\
+    s = temp;\
+} while(0)
+
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %1, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %0         \n"\
+        :"+m"(M32(s))\
+        :"m"(M32(x))\
+    );\
+} while(0)
+#else
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    CLIP_ADD((s)[0], (x)[0]);\
+    CLIP_ADD((s)[1], (x)[1]);\
+} while(0)
+#endif
+
+#define PROPAGATE_LIST(cpu)\
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
+                                                uint16_t *lowres_costs, int16_t *output,\
+                                                int bipred_weight, int mb_y, int len );\
+\
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
+                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
+                                              int bipred_weight, int mb_y, int len, int list )\
+{\
+    int16_t *current = h->scratch_buffer2;\
+\
+    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
+                                               current, bipred_weight, mb_y, len );\
+\
+    unsigned stride = h->mb.i_mb_stride;\
+    unsigned width = h->mb.i_mb_width;\
+    unsigned height = h->mb.i_mb_height;\
+\
+    for( unsigned i = 0; i < len; current += 32 )\
+    {\
+        int end = X264_MIN( i+8, len );\
+        for( ; i < end; i++, current += 2 )\
+        {\
+            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
+                continue;\
+\
+            unsigned mbx = current[0];\
+            unsigned mby = current[1];\
+            unsigned idx0 = mbx + mby * stride;\
+            unsigned idx2 = idx0 + stride;\
+\
+            /* Shortcut for the simple/common case of zero MV */\
+            if( !M32( mvs[i] ) )\
+            {\
+                CLIP_ADD( ref_costs[idx0], current[16] );\
+                continue;\
+            }\
+\
+            if( mbx < width-1 && mby < height-1 )\
+            {\
+                CLIP_ADD2( ref_costs+idx0, current+16 );\
+                CLIP_ADD2( ref_costs+idx2, current+32 );\
+            }\
+            else\
+            {\
+                /* Note: this takes advantage of unsigned representation to\
+                 * catch negative mbx/mby. */\
+                if( mby < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx0+0], current[16] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx0+1], current[17] );\
+                }\
+                if( mby+1 < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx2+0], current[32] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx2+1], current[33] );\
+                }\
+            }\
+        }\
+    }\
+}
+
+PROPAGATE_LIST(ssse3)
+PROPAGATE_LIST(avx)
+#undef CLIP_ADD
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-32.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* pixel-32.asm: x86_32 pixel metrics
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* pixel.asm: x86 pixel metrics
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -205,7 +205,7 @@
     mov    r4d, %%n
 %endif
     pxor    m0, m0
-.loop
+.loop:
     mova    m1, [r0]
     mova    m2, [r0+offset0_1]
     mova    m3, [r0+offset0_2]
@@ -1265,7 +1265,7 @@
 ; clobber: m3..m7
 ; out: %1 = satd
 %macro SATD_4x4_MMX 3
-    %xdefine %%n n%1
+    %xdefine %%n nn%1
     %assign offset %2*SIZEOF_PIXEL
     LOAD_DIFF m4, m3, none, [r0+     offset], [r2+     offset]
     LOAD_DIFF m5, m3, none, [r0+  r1+offset], [r2+  r3+offset]
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: x86 pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -47,7 +47,6 @@
 
 DECL_X1( sad, mmx2 )
 DECL_X1( sad, sse2 )
-DECL_X4( sad, sse2_misalign )
 DECL_X1( sad, sse3 )
 DECL_X1( sad, sse2_aligned )
 DECL_X1( sad, ssse3 )
@@ -57,6 +56,7 @@
 DECL_X4( sad, sse2 )
 DECL_X4( sad, sse3 )
 DECL_X4( sad, ssse3 )
+DECL_X4( sad, avx )
 DECL_X4( sad, avx2 )
 DECL_X1( ssd, mmx )
 DECL_X1( ssd, mmx2 )
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* predict-a.asm: x86 intra prediction
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -31,7 +31,6 @@
 
 SECTION_RODATA 32
 
-pw_0to15:    dw 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 pw_43210123: times 2 dw -3, -2, -1, 0, 1, 2, 3, 4
 pw_m3:       times 16 dw -3
 pw_m7:       times 16 dw -7
@@ -56,6 +55,7 @@
 cextern pw_16
 cextern pw_00ff
 cextern pw_pixel_max
+cextern pw_0to15
 
 %macro STORE8 1
     mova [r0+0*FDEC_STRIDEB], %1
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-c.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict-c.c: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: x86 intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* quant-a.asm: x86 quantization and level-run
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: x86 quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/sad-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad-a.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad-a.asm: x86 sad functions
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,7 +32,6 @@
 SECTION_RODATA 32
 
 pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
-deinterleave_sadx4: dd 0,4,2,6
 hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
 
 SECTION .text
@@ -1009,62 +1008,56 @@
 ;=============================================================================
 
 %macro SAD_X3_START_1x16P_SSE2 0
-%if cpuflag(misalign)
-    mova   xmm2, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    psadbw xmm0, xmm2
-    psadbw xmm1, xmm2
-    psadbw xmm2, [r3]
+    mova     m2, [r0]
+%if cpuflag(avx)
+    psadbw   m0, m2, [r1]
+    psadbw   m1, m2, [r2]
+    psadbw   m2, [r3]
 %else
-    mova   xmm3, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    psadbw xmm0, xmm3
-    psadbw xmm1, xmm3
-    psadbw xmm2, xmm3
+    movu     m0, [r1]
+    movu     m1, [r2]
+    movu     m3, [r3]
+    psadbw   m0, m2
+    psadbw   m1, m2
+    psadbw   m2, m3
 %endif
 %endmacro
 
 %macro SAD_X3_1x16P_SSE2 2
-%if cpuflag(misalign)
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm3, [r3+%2]
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm3
+    mova     m3, [r0+%1]
+%if cpuflag(avx)
+    psadbw   m4, m3, [r1+%2]
+    psadbw   m5, m3, [r2+%2]
+    psadbw   m3, [r3+%2]
 %else
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm6, xmm3
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
+    movu     m4, [r1+%2]
+    movu     m5, [r2+%2]
+    movu     m6, [r3+%2]
+    psadbw   m4, m3
+    psadbw   m5, m3
+    psadbw   m3, m6
 %endif
+    paddw    m0, m4
+    paddw    m1, m5
+    paddw    m2, m3
 %endmacro
 
+%if ARCH_X86_64
+    DECLARE_REG_TMP 6
+%else
+    DECLARE_REG_TMP 5
+%endif
+
 %macro SAD_X3_4x16P_SSE2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_1x16P_SSE2
 %else
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0
 %endif
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2
-    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5
+    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1076,156 +1069,117 @@
 %endmacro
 
 %macro SAD_X3_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r4]
-    movhps  xmm1, [r2+r4]
-    movhps  xmm2, [r3+r4]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
+    movq     m3, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movhps   m3, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r4]
+    movhps   m1, [r2+r4]
+    movhps   m2, [r3+r4]
+    psadbw   m0, m3
+    psadbw   m1, m3
+    psadbw   m2, m3
 %endmacro
 
 %macro SAD_X3_2x8P_SSE2 4
-    movq    xmm7, [r0+%1]
-    movq    xmm3, [r1+%2]
-    movq    xmm4, [r2+%2]
-    movq    xmm5, [r3+%2]
-    movhps  xmm7, [r0+%3]
-    movhps  xmm3, [r1+%4]
-    movhps  xmm4, [r2+%4]
-    movhps  xmm5, [r3+%4]
-    psadbw  xmm3, xmm7
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm3
-    paddw   xmm1, xmm4
-    paddw   xmm2, xmm5
+    movq     m6, [r0+%1]
+    movq     m3, [r1+%2]
+    movq     m4, [r2+%2]
+    movq     m5, [r3+%2]
+    movhps   m6, [r0+%3]
+    movhps   m3, [r1+%4]
+    movhps   m4, [r2+%4]
+    movhps   m5, [r3+%4]
+    psadbw   m3, m6
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m0, m3
+    paddw    m1, m4
+    paddw    m2, m5
 %endmacro
 
 %macro SAD_X4_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movq    xmm3, [r4]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r5]
-    movhps  xmm1, [r2+r5]
-    movhps  xmm2, [r3+r5]
-    movhps  xmm3, [r4+r5]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
-    psadbw  xmm3, xmm7
+    movq     m4, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movq     m3, [r4]
+    movhps   m4, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r5]
+    movhps   m1, [r2+r5]
+    movhps   m2, [r3+r5]
+    movhps   m3, [r4+r5]
+    psadbw   m0, m4
+    psadbw   m1, m4
+    psadbw   m2, m4

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad-a.asm: x86 sad functions
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,7 +32,6 @@
 SECTION_RODATA 32
 
 pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
-deinterleave_sadx4: dd 0,4,2,6
 hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
 
 SECTION .text
@@ -1009,62 +1008,56 @@
 ;=============================================================================
 
 %macro SAD_X3_START_1x16P_SSE2 0
-%if cpuflag(misalign)
-    mova   xmm2, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    psadbw xmm0, xmm2
-    psadbw xmm1, xmm2
-    psadbw xmm2, [r3]
+    mova     m2, [r0]
+%if cpuflag(avx)
+    psadbw   m0, m2, [r1]
+    psadbw   m1, m2, [r2]
+    psadbw   m2, [r3]
 %else
-    mova   xmm3, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    psadbw xmm0, xmm3
-    psadbw xmm1, xmm3
-    psadbw xmm2, xmm3
+    movu     m0, [r1]
+    movu     m1, [r2]
+    movu     m3, [r3]
+    psadbw   m0, m2
+    psadbw   m1, m2
+    psadbw   m2, m3
 %endif
 %endmacro
 
 %macro SAD_X3_1x16P_SSE2 2
-%if cpuflag(misalign)
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm3, [r3+%2]
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm3
+    mova     m3, [r0+%1]
+%if cpuflag(avx)
+    psadbw   m4, m3, [r1+%2]
+    psadbw   m5, m3, [r2+%2]
+    psadbw   m3, [r3+%2]
 %else
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm6, xmm3
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
+    movu     m4, [r1+%2]
+    movu     m5, [r2+%2]
+    movu     m6, [r3+%2]
+    psadbw   m4, m3
+    psadbw   m5, m3
+    psadbw   m3, m6
 %endif
+    paddw    m0, m4
+    paddw    m1, m5
+    paddw    m2, m3
 %endmacro
 
+%if ARCH_X86_64
+    DECLARE_REG_TMP 6
+%else
+    DECLARE_REG_TMP 5
+%endif
+
 %macro SAD_X3_4x16P_SSE2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_1x16P_SSE2
 %else
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0
 %endif
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2
-    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5
+    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1076,156 +1069,117 @@
 %endmacro
 
 %macro SAD_X3_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r4]
-    movhps  xmm1, [r2+r4]
-    movhps  xmm2, [r3+r4]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
+    movq     m3, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movhps   m3, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r4]
+    movhps   m1, [r2+r4]
+    movhps   m2, [r3+r4]
+    psadbw   m0, m3
+    psadbw   m1, m3
+    psadbw   m2, m3
 %endmacro
 
 %macro SAD_X3_2x8P_SSE2 4
-    movq    xmm7, [r0+%1]
-    movq    xmm3, [r1+%2]
-    movq    xmm4, [r2+%2]
-    movq    xmm5, [r3+%2]
-    movhps  xmm7, [r0+%3]
-    movhps  xmm3, [r1+%4]
-    movhps  xmm4, [r2+%4]
-    movhps  xmm5, [r3+%4]
-    psadbw  xmm3, xmm7
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm3
-    paddw   xmm1, xmm4
-    paddw   xmm2, xmm5
+    movq     m6, [r0+%1]
+    movq     m3, [r1+%2]
+    movq     m4, [r2+%2]
+    movq     m5, [r3+%2]
+    movhps   m6, [r0+%3]
+    movhps   m3, [r1+%4]
+    movhps   m4, [r2+%4]
+    movhps   m5, [r3+%4]
+    psadbw   m3, m6
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m0, m3
+    paddw    m1, m4
+    paddw    m2, m5
 %endmacro
 
 %macro SAD_X4_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movq    xmm3, [r4]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r5]
-    movhps  xmm1, [r2+r5]
-    movhps  xmm2, [r3+r5]
-    movhps  xmm3, [r4+r5]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
-    psadbw  xmm3, xmm7
+    movq     m4, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movq     m3, [r4]
+    movhps   m4, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r5]
+    movhps   m1, [r2+r5]
+    movhps   m2, [r3+r5]
+    movhps   m3, [r4+r5]
+    psadbw   m0, m4
+    psadbw   m1, m4
+    psadbw   m2, m4
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/sad16-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad16-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad16-a.asm: x86 high depth sad functions
 ;*****************************************************************************
-;* Copyright (C) 2010-2013 x264 project
+;* Copyright (C) 2010-2014 x264 project
 ;*
 ;* Authors: Oskar Arvidsson <oskar@irock.se>
 ;*          Henrik Gramner <henrik@gramner.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/trellis-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/trellis-64.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* trellis-64.asm: x86_64 trellis quantization
 ;*****************************************************************************
-;* Copyright (C) 2012-2013 x264 project
+;* Copyright (C) 2012-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/util.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/util.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * util.h: x86 inline asm
  *****************************************************************************
- * Copyright (C) 2008-2013 x264 project
+ * Copyright (C) 2008-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86inc.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
@@ -42,6 +42,14 @@
     %define public_prefix private_prefix
 %endif
 
+%ifndef STACK_ALIGNMENT
+    %if ARCH_X86_64
+        %define STACK_ALIGNMENT 16
+    %else
+        %define STACK_ALIGNMENT 4
+    %endif
+%endif
+
 %define WIN64  0
 %define UNIX64 0
 %if ARCH_X86_64
@@ -49,6 +57,8 @@
         %define WIN64  1
     %elifidn __OUTPUT_FORMAT__,win64
         %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,x64
+        %define WIN64  1
     %else
         %define UNIX64 1
     %endif
@@ -92,8 +102,9 @@
 ; %1 = number of arguments. loads them from stack if needed.
 ; %2 = number of registers used. pushes callee-saved regs if needed.
 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
-;      MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+;      allocating the specified stack size. If the required stack alignment is
+;      larger than the known stack alignment the stack will be manually aligned
 ;      and an extra register will be allocated to hold the original stack
 ;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
 ;      register as stack pointer, request a negative stack size.
@@ -101,8 +112,10 @@
 ; PROLOGUE can also be invoked by adding the same options to cglobal
 
 ; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
 
 ; TODO Some functions can use some args directly from the stack. If they're the
 ; last args then you can just not declare them, but if they're in the middle
@@ -302,26 +315,28 @@
     %assign n_arg_names %0
 %endmacro
 
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
         %if %1 != 0
-            %assign %%stack_alignment ((mmsize + 15) & ~15)
+            %assign %%pad 0
             %assign stack_size %1
             %if stack_size < 0
                 %assign stack_size -stack_size
             %endif
-            %assign stack_size_padded stack_size
             %if WIN64
-                %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+                %assign %%pad %%pad + 32 ; shadow space
                 %if mmsize != 8
                     %assign xmm_regs_used %2
                     %if xmm_regs_used > 8
-                        %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
                     %endif
                 %endif
             %endif
-            %if mmsize <= 16 && HAVE_ALIGNED_STACK
-                %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
+            %if required_stack_alignment <= STACK_ALIGNMENT
+                ; maintain the current stack alignment
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
                 SUB rsp, stack_size_padded
             %else
                 %assign %%reg_num (regs_used - 1)
@@ -330,17 +345,17 @@
                 ; it, i.e. in [rsp+stack_size_padded], so we can restore the
                 ; stack in a single instruction (i.e. mov rsp, rstk or mov
                 ; rsp, [rsp+stack_size_padded])
-                mov  rstk, rsp
                 %if %1 < 0 ; need to store rsp on stack
-                    sub  rsp, gprsize+stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
-                    %xdefine rstkm [rsp+stack_size_padded]
-                    mov rstkm, rstk
+                    %xdefine rstkm [rsp + stack_size + %%pad]
+                    %assign %%pad %%pad + gprsize
                 %else ; can keep rsp in rstk during whole function
-                    sub  rsp, stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
                     %xdefine rstkm rstk
                 %endif
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                mov rstk, rsp
+                and rsp, ~(required_stack_alignment-1)
+                sub rsp, stack_size_padded
+                movifnidn rstkm, rstk
             %endif
             WIN64_PUSH_XMM
         %endif
@@ -349,7 +364,7 @@
 
 %macro SETUP_STACK_POINTER 1
     %ifnum %1
-        %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
             %if %1 > 0
                 %assign regs_used (regs_used + 1)
             %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
@@ -423,7 +438,9 @@
     %assign xmm_regs_used %1
     ASSERT xmm_regs_used <= 16
     %if xmm_regs_used > 8
-        %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
         SUB rsp, stack_size_padded
     %endif
     WIN64_PUSH_XMM
@@ -439,7 +456,7 @@
         %endrep
     %endif
     %if stack_size_padded > 0
-        %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
             mov rsp, rstkm
         %else
             add %1, stack_size_padded
@@ -505,7 +522,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -561,7 +578,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -731,11 +748,10 @@
 %assign cpuflags_cache64  (1<<17)
 %assign cpuflags_slowctz  (1<<18)
 %assign cpuflags_lzcnt    (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<22)
-%assign cpuflags_bmi1     (1<<23)|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
+%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<21)
+%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
 
 %define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
 %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
@@ -777,9 +793,9 @@
 %endmacro
 
 ; Merge mmx and sse*
-; m# is a simd regsiter of the currently selected size
-; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
-; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
+; m# is a simd register of the currently selected size
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
 ; (All 3 remain in sync through SWAP.)
 
 %macro CAT_XDEFINE 3
@@ -802,12 +818,12 @@
     %assign %%i 0
     %rep 8
     CAT_XDEFINE m, %%i, mm %+ %%i
-    CAT_XDEFINE nmm, %%i, %%i
+    CAT_XDEFINE nnmm, %%i, %%i

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
@@ -42,6 +42,14 @@
     %define public_prefix private_prefix
 %endif
 
+%ifndef STACK_ALIGNMENT
+    %if ARCH_X86_64
+        %define STACK_ALIGNMENT 16
+    %else
+        %define STACK_ALIGNMENT 4
+    %endif
+%endif
+
 %define WIN64  0
 %define UNIX64 0
 %if ARCH_X86_64
@@ -49,6 +57,8 @@
         %define WIN64  1
     %elifidn __OUTPUT_FORMAT__,win64
         %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,x64
+        %define WIN64  1
     %else
         %define UNIX64 1
     %endif
@@ -92,8 +102,9 @@
 ; %1 = number of arguments. loads them from stack if needed.
 ; %2 = number of registers used. pushes callee-saved regs if needed.
 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
-;      MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+;      allocating the specified stack size. If the required stack alignment is
+;      larger than the known stack alignment the stack will be manually aligned
 ;      and an extra register will be allocated to hold the original stack
 ;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
 ;      register as stack pointer, request a negative stack size.
@@ -101,8 +112,10 @@
 ; PROLOGUE can also be invoked by adding the same options to cglobal
 
 ; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
 
 ; TODO Some functions can use some args directly from the stack. If they're the
 ; last args then you can just not declare them, but if they're in the middle
@@ -302,26 +315,28 @@
     %assign n_arg_names %0
 %endmacro
 
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
         %if %1 != 0
-            %assign %%stack_alignment ((mmsize + 15) & ~15)
+            %assign %%pad 0
             %assign stack_size %1
             %if stack_size < 0
                 %assign stack_size -stack_size
             %endif
-            %assign stack_size_padded stack_size
             %if WIN64
-                %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+                %assign %%pad %%pad + 32 ; shadow space
                 %if mmsize != 8
                     %assign xmm_regs_used %2
                     %if xmm_regs_used > 8
-                        %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
                     %endif
                 %endif
             %endif
-            %if mmsize <= 16 && HAVE_ALIGNED_STACK
-                %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
+            %if required_stack_alignment <= STACK_ALIGNMENT
+                ; maintain the current stack alignment
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
                 SUB rsp, stack_size_padded
             %else
                 %assign %%reg_num (regs_used - 1)
@@ -330,17 +345,17 @@
                 ; it, i.e. in [rsp+stack_size_padded], so we can restore the
                 ; stack in a single instruction (i.e. mov rsp, rstk or mov
                 ; rsp, [rsp+stack_size_padded])
-                mov  rstk, rsp
                 %if %1 < 0 ; need to store rsp on stack
-                    sub  rsp, gprsize+stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
-                    %xdefine rstkm [rsp+stack_size_padded]
-                    mov rstkm, rstk
+                    %xdefine rstkm [rsp + stack_size + %%pad]
+                    %assign %%pad %%pad + gprsize
                 %else ; can keep rsp in rstk during whole function
-                    sub  rsp, stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
                     %xdefine rstkm rstk
                 %endif
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                mov rstk, rsp
+                and rsp, ~(required_stack_alignment-1)
+                sub rsp, stack_size_padded
+                movifnidn rstkm, rstk
             %endif
             WIN64_PUSH_XMM
         %endif
@@ -349,7 +364,7 @@
 
 %macro SETUP_STACK_POINTER 1
     %ifnum %1
-        %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
             %if %1 > 0
                 %assign regs_used (regs_used + 1)
             %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
@@ -423,7 +438,9 @@
     %assign xmm_regs_used %1
     ASSERT xmm_regs_used <= 16
     %if xmm_regs_used > 8
-        %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
         SUB rsp, stack_size_padded
     %endif
     WIN64_PUSH_XMM
@@ -439,7 +456,7 @@
         %endrep
     %endif
     %if stack_size_padded > 0
-        %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
             mov rsp, rstkm
         %else
             add %1, stack_size_padded
@@ -505,7 +522,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -561,7 +578,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -731,11 +748,10 @@
 %assign cpuflags_cache64  (1<<17)
 %assign cpuflags_slowctz  (1<<18)
 %assign cpuflags_lzcnt    (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<22)
-%assign cpuflags_bmi1     (1<<23)|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
+%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<21)
+%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
 
 %define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
 %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
@@ -777,9 +793,9 @@
 %endmacro
 
 ; Merge mmx and sse*
-; m# is a simd regsiter of the currently selected size
-; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
-; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
+; m# is a simd register of the currently selected size
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
 ; (All 3 remain in sync through SWAP.)
 
 %macro CAT_XDEFINE 3
@@ -802,12 +818,12 @@
     %assign %%i 0
     %rep 8
     CAT_XDEFINE m, %%i, mm %+ %%i
-    CAT_XDEFINE nmm, %%i, %%i
+    CAT_XDEFINE nnmm, %%i, %%i
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/x86util.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86util.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86util.asm: x86 utility macros
 ;*****************************************************************************
-;* Copyright (C) 2008-2013 x264 project
+;* Copyright (C) 2008-2014 x264 project
 ;*
 ;* Authors: Holger Lubitz <holger@lubitz.org>
 ;*          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/configure -> x264-snapshot-20140321-2245.tar.bz2/configure Changed

@@ -30,7 +30,6 @@
   --disable-thread         disable multithreaded encoding
   --enable-win32thread     use win32threads (windows only)
   --disable-interlaced     disable interlaced encoding support
-  --enable-visualize       enable visualization (X11 only)
   --bit-depth=BIT_DEPTH    set output bit depth (8-10) [8]
   --chroma-format=FORMAT   output chroma format (420, 422, 444, all) [all]
 
@@ -52,6 +51,7 @@
   --disable-lavf           disable libavformat support
   --disable-ffms           disable ffmpegsource support
   --disable-gpac           disable gpac support
+  --disable-lsmash         disable lsmash support
 
 EOF
 exit 1
@@ -264,6 +264,8 @@
 lavf="auto"
 ffms="auto"
 gpac="auto"
+lsmash="auto"
+mp4="no"
 gpl="yes"
 thread="auto"
 swscale="auto"
@@ -273,7 +275,6 @@
 gprof="no"
 strip="no"
 pic="no"
-vis="no"
 bit_depth="8"
 chroma_format="all"
 compiler="GNU"
@@ -290,7 +291,8 @@
 EXE=""
 
 # list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH"
 
 # parse options
 
@@ -342,6 +344,9 @@
         --disable-gpac)
             gpac="no"
             ;;
+        --disable-lsmash)
+            lsmash="no"
+            ;;
         --disable-gpl)
             gpl="no"
             ;;
@@ -380,9 +385,6 @@
         --enable-pic)
             pic="yes"
             ;;
-        --enable-visualize)
-            vis="yes"
-            ;;
         --host=*)
             host="$optarg"
             ;;
@@ -423,6 +425,7 @@
 AR="${AR-${cross_prefix}ar}"
 RANLIB="${RANLIB-${cross_prefix}ranlib}"
 STRIP="${STRIP-${cross_prefix}strip}"
+INSTALL="${INSTALL-install}"
 
 if [ "x$host" = x ]; then
     host=`${SRCPATH}/config.guess`
@@ -503,12 +506,13 @@
             CFLAGS="$CFLAGS -mno-cygwin"
             LDFLAGS="$LDFLAGS -mno-cygwin"
         fi
-        if cpp_check "" "" "defined(__CYGWIN32__)" ; then
+        if cpp_check "" "" "defined(__CYGWIN__)" ; then
             define HAVE_MALLOC_H
             SYS="CYGWIN"
         else
             SYS="WINDOWS"
             DEVNULL="NUL"
+            LDFLAGSCLI="$LDFLAGSCLI -lshell32"
             RC="${RC-${cross_prefix}windres}"
         fi
         ;;
@@ -516,6 +520,7 @@
         SYS="WINDOWS"
         EXE=".exe"
         DEVNULL="NUL"
+        LDFLAGSCLI="$LDFLAGSCLI -lshell32"
         [ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}"
         ;;
     sunos*|solaris*)
@@ -527,6 +532,15 @@
         else
             LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o"
         fi
+        if test -x /usr/ucb/install ; then
+            INSTALL=/usr/ucb/install
+        elif test -x /usr/bin/ginstall ; then
+            # OpenSolaris
+            INSTALL=/usr/bin/ginstall
+        elif test -x /usr/gnu/bin/install ; then
+            # OpenSolaris
+            INSTALL=/usr/gnu/bin/install
+        fi
         HAVE_GETOPT_LONG=0
         ;;
     *qnx*)
@@ -543,7 +557,7 @@
 
 LDFLAGS="$LDFLAGS $libm"
 
-aligned_stack=1
+stack_alignment=16
 case $host_cpu in
     i*86)
         ARCH="X86"
@@ -563,8 +577,7 @@
             if [ $SYS = LINUX ]; then
                 # < 11 is completely incapable of keeping a mod16 stack
                 if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
-                    define BROKEN_STACK_ALIGNMENT
-                    aligned_stack=0
+                    stack_alignment=4
                 # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
                 elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
                     CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
@@ -572,7 +585,7 @@
                 # >= 12 defaults to a mod16 stack
             fi
             # icl on windows has no mod16 stack support
-            [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
+            [ $SYS = WINDOWS ] && stack_alignment=4
         fi
         if [ "$SYS" = MACOSX ]; then
             ASFLAGS="$ASFLAGS -f macho -DPREFIX"
@@ -595,7 +608,7 @@
                 CFLAGS="$CFLAGS -arch x86_64"
                 LDFLAGS="$LDFLAGS -arch x86_64"
             fi
-        elif [ "$SYS" = WINDOWS ]; then
+        elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
             ASFLAGS="$ASFLAGS -f win32 -m amd64"
             # only the GNU toolchain is inconsistent in prefixing function names with _
             [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
@@ -667,7 +680,6 @@
         ARCH="$(echo $host_cpu | tr a-z A-Z)"
         ;;
 esac
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
 
 if [ $SYS = WINDOWS ]; then
     if ! rc_check "0 RCDATA {0}" ; then
@@ -719,10 +731,11 @@
         echo "If you really want to compile without asm, configure with --disable-asm."
         exit 1
     fi
+    ASFLAGS="$ASFLAGS -Worphan-labels"
     define HAVE_MMX
-    if cc_check '' -mpreferred-stack-boundary=5 ; then
+    if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
         CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
-        define HAVE_32B_STACK_ALIGNMENT
+        stack_alignment=32
     fi
 fi
 
@@ -747,6 +760,9 @@
 define ARCH_$ARCH
 define SYS_$SYS
 
+define STACK_ALIGNMENT $stack_alignment
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
+
 # skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
 if [ $compiler = GNU ]; then
     echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
@@ -792,10 +808,15 @@
             fi
             ;;
         QNX)
-            cc_check pthread.h -lc && thread="posix" && libpthread="-lc"
+            cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc"
             ;;
         *)
-            cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread"
+            if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
+               thread="posix"
+               libpthread="-lpthread"
+            else
+                cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread=""
+            fi
             ;;
     esac
 fi
@@ -820,16 +841,8 @@
     define HAVE_LOG2F
 fi

 
@@ -30,7 +30,6 @@
   --disable-thread         disable multithreaded encoding
   --enable-win32thread     use win32threads (windows only)
   --disable-interlaced     disable interlaced encoding support
-  --enable-visualize       enable visualization (X11 only)
   --bit-depth=BIT_DEPTH    set output bit depth (8-10) [8]
   --chroma-format=FORMAT   output chroma format (420, 422, 444, all) [all]
 
@@ -52,6 +51,7 @@
   --disable-lavf           disable libavformat support
   --disable-ffms           disable ffmpegsource support
   --disable-gpac           disable gpac support
+  --disable-lsmash         disable lsmash support
 
 EOF
 exit 1
@@ -264,6 +264,8 @@
 lavf="auto"
 ffms="auto"
 gpac="auto"
+lsmash="auto"
+mp4="no"
 gpl="yes"
 thread="auto"
 swscale="auto"
@@ -273,7 +275,6 @@
 gprof="no"
 strip="no"
 pic="no"
-vis="no"
 bit_depth="8"
 chroma_format="all"
 compiler="GNU"
@@ -290,7 +291,8 @@
 EXE=""
 
 # list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH"
 
 # parse options
 
@@ -342,6 +344,9 @@
         --disable-gpac)
             gpac="no"
             ;;
+        --disable-lsmash)
+            lsmash="no"
+            ;;
         --disable-gpl)
             gpl="no"
             ;;
@@ -380,9 +385,6 @@
         --enable-pic)
             pic="yes"
             ;;
-        --enable-visualize)
-            vis="yes"
-            ;;
         --host=*)
             host="$optarg"
             ;;
@@ -423,6 +425,7 @@
 AR="${AR-${cross_prefix}ar}"
 RANLIB="${RANLIB-${cross_prefix}ranlib}"
 STRIP="${STRIP-${cross_prefix}strip}"
+INSTALL="${INSTALL-install}"
 
 if [ "x$host" = x ]; then
     host=`${SRCPATH}/config.guess`
@@ -503,12 +506,13 @@
             CFLAGS="$CFLAGS -mno-cygwin"
             LDFLAGS="$LDFLAGS -mno-cygwin"
         fi
-        if cpp_check "" "" "defined(__CYGWIN32__)" ; then
+        if cpp_check "" "" "defined(__CYGWIN__)" ; then
             define HAVE_MALLOC_H
             SYS="CYGWIN"
         else
             SYS="WINDOWS"
             DEVNULL="NUL"
+            LDFLAGSCLI="$LDFLAGSCLI -lshell32"
             RC="${RC-${cross_prefix}windres}"
         fi
         ;;
@@ -516,6 +520,7 @@
         SYS="WINDOWS"
         EXE=".exe"
         DEVNULL="NUL"
+        LDFLAGSCLI="$LDFLAGSCLI -lshell32"
         [ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}"
         ;;
     sunos*|solaris*)
@@ -527,6 +532,15 @@
         else
             LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o"
         fi
+        if test -x /usr/ucb/install ; then
+            INSTALL=/usr/ucb/install
+        elif test -x /usr/bin/ginstall ; then
+            # OpenSolaris
+            INSTALL=/usr/bin/ginstall
+        elif test -x /usr/gnu/bin/install ; then
+            # OpenSolaris
+            INSTALL=/usr/gnu/bin/install
+        fi
         HAVE_GETOPT_LONG=0
         ;;
     *qnx*)
@@ -543,7 +557,7 @@
 
 LDFLAGS="$LDFLAGS $libm"
 
-aligned_stack=1
+stack_alignment=16
 case $host_cpu in
     i*86)
         ARCH="X86"
@@ -563,8 +577,7 @@
             if [ $SYS = LINUX ]; then
                 # < 11 is completely incapable of keeping a mod16 stack
                 if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
-                    define BROKEN_STACK_ALIGNMENT
-                    aligned_stack=0
+                    stack_alignment=4
                 # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
                 elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
                     CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
@@ -572,7 +585,7 @@
                 # >= 12 defaults to a mod16 stack
             fi
             # icl on windows has no mod16 stack support
-            [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
+            [ $SYS = WINDOWS ] && stack_alignment=4
         fi
         if [ "$SYS" = MACOSX ]; then
             ASFLAGS="$ASFLAGS -f macho -DPREFIX"
@@ -595,7 +608,7 @@
                 CFLAGS="$CFLAGS -arch x86_64"
                 LDFLAGS="$LDFLAGS -arch x86_64"
             fi
-        elif [ "$SYS" = WINDOWS ]; then
+        elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
             ASFLAGS="$ASFLAGS -f win32 -m amd64"
             # only the GNU toolchain is inconsistent in prefixing function names with _
             [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
@@ -667,7 +680,6 @@
         ARCH="$(echo $host_cpu | tr a-z A-Z)"
         ;;
 esac
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
 
 if [ $SYS = WINDOWS ]; then
     if ! rc_check "0 RCDATA {0}" ; then
@@ -719,10 +731,11 @@
         echo "If you really want to compile without asm, configure with --disable-asm."
         exit 1
     fi
+    ASFLAGS="$ASFLAGS -Worphan-labels"
     define HAVE_MMX
-    if cc_check '' -mpreferred-stack-boundary=5 ; then
+    if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
         CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
-        define HAVE_32B_STACK_ALIGNMENT
+        stack_alignment=32
     fi
 fi
 
@@ -747,6 +760,9 @@
 define ARCH_$ARCH
 define SYS_$SYS
 
+define STACK_ALIGNMENT $stack_alignment
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
+
 # skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
 if [ $compiler = GNU ]; then
     echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
@@ -792,10 +808,15 @@
             fi
             ;;
         QNX)
-            cc_check pthread.h -lc && thread="posix" && libpthread="-lc"
+            cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc"
             ;;
         *)
-            cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread"
+            if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
+               thread="posix"
+               libpthread="-lpthread"
+            else
+                cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread=""
+            fi
             ;;
     esac
 fi
@@ -820,16 +841,8 @@
     define HAVE_LOG2F
 fi
​

x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.c: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -436,7 +436,7 @@
     /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
      * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
     uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
-    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
+    a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
 
     a->b_fast_intra = 0;
     a->b_avoid_topright = 0;
@@ -618,6 +618,24 @@
     {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
 };
 
+static const int8_t i8x8_mode_available[2][5][10] =
+{
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
+    },
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+    }
+};
+
 static const int8_t i4x4_mode_available[2][5][10] =
 {
     {
@@ -632,7 +650,7 @@
         {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
-        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1},
     }
 };
 
@@ -655,7 +673,7 @@
     int avoid_topright = force_intra && (i&1);
     int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
     idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
-    return i4x4_mode_available[avoid_topright][idx];
+    return i8x8_mode_available[avoid_topright][idx];
 }
 
 static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
@@ -793,58 +811,60 @@
     int lambda = a->i_lambda;
 
     /*---------------- Try all mode and calculate their score ---------------*/
+    /* Disabled i16x16 for AVC-Intra compat */
+    if( !h->param.i_avcintra_class )
+    {
+        const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
 
-    /* 16x16 prediction selection */
-    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
+        /* Not heavily tuned */
+        static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
+        int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
 
-    /* Not heavily tuned */
-    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
-    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
-
-    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
-    {
-        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
-        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
-        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
-        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
-
-        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
-        if( a->i_satd_i16x16 <= i16x16_thresh )
-        {
-            h->predict_16x16[I_PRED_16x16_P]( p_dst );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
-            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+        if( !h->mb.b_lossless && predict_mode[3] >= 0 )
+        {
+            h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
+            a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
+            a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
+            a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
+
+            /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
+            if( a->i_satd_i16x16 <= i16x16_thresh )
+            {
+                h->predict_16x16[I_PRED_16x16_P]( p_dst );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
+                COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+            }
         }
-    }
-    else
-    {
-        for( ; *predict_mode >= 0; predict_mode++ )
+        else
         {
-            int i_satd;
-            int i_mode = *predict_mode;
+            for( ; *predict_mode >= 0; predict_mode++ )
+            {
+                int i_satd;
+                int i_mode = *predict_mode;
 
-            if( h->mb.b_lossless )
-                x264_predict_lossless_16x16( h, 0, i_mode );
-            else
-                h->predict_16x16[i_mode]( p_dst );
+                if( h->mb.b_lossless )
+                    x264_predict_lossless_16x16( h, 0, i_mode );
+                else
+                    h->predict_16x16[i_mode]( p_dst );
 
-            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
-                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
-            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
-            a->i_satd_i16x16_dir[i_mode] = i_satd;
+                i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
+                         lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
+                COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
+                a->i_satd_i16x16_dir[i_mode] = i_satd;
+            }
         }
-    }
 
-    if( h->sh.i_type == SLICE_TYPE_B )
-        /* cavlc mb type prefix */
-        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
+        if( h->sh.i_type == SLICE_TYPE_B )
+            /* cavlc mb type prefix */
+            a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
 
-    if( a->i_satd_i16x16 > i16x16_thresh )
-        return;
+        if( a->i_satd_i16x16 > i16x16_thresh )
+            return;
+    }
 
     uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
     /* 8x8 prediction selection */
@@ -870,7 +890,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
 
-            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
+            const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
             h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
 
             if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
@@ -985,7 +1005,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
 
-            predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
+            const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
 
             if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                 /* emulate missing topright samples */
@@ -2101,7 +2121,7 @@
         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
                    + ref_costs + l0_mv_cost + l1_mv_cost;
 
-        if( h->mb.b_chroma_me )
+        if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
         {
             ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.c: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -436,7 +436,7 @@
     /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
      * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
     uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
-    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
+    a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
 
     a->b_fast_intra = 0;
     a->b_avoid_topright = 0;
@@ -618,6 +618,24 @@
     {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
 };
 
+static const int8_t i8x8_mode_available[2][5][10] =
+{
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
+    },
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+    }
+};
+
 static const int8_t i4x4_mode_available[2][5][10] =
 {
     {
@@ -632,7 +650,7 @@
         {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
-        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1},
     }
 };
 
@@ -655,7 +673,7 @@
     int avoid_topright = force_intra && (i&1);
     int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
     idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
-    return i4x4_mode_available[avoid_topright][idx];
+    return i8x8_mode_available[avoid_topright][idx];
 }
 
 static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
@@ -793,58 +811,60 @@
     int lambda = a->i_lambda;
 
     /*---------------- Try all mode and calculate their score ---------------*/
+    /* Disabled i16x16 for AVC-Intra compat */
+    if( !h->param.i_avcintra_class )
+    {
+        const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
 
-    /* 16x16 prediction selection */
-    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
+        /* Not heavily tuned */
+        static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
+        int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
 
-    /* Not heavily tuned */
-    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
-    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
-
-    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
-    {
-        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
-        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
-        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
-        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
-
-        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
-        if( a->i_satd_i16x16 <= i16x16_thresh )
-        {
-            h->predict_16x16[I_PRED_16x16_P]( p_dst );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
-            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+        if( !h->mb.b_lossless && predict_mode[3] >= 0 )
+        {
+            h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
+            a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
+            a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
+            a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
+
+            /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
+            if( a->i_satd_i16x16 <= i16x16_thresh )
+            {
+                h->predict_16x16[I_PRED_16x16_P]( p_dst );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
+                COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+            }
         }
-    }
-    else
-    {
-        for( ; *predict_mode >= 0; predict_mode++ )
+        else
         {
-            int i_satd;
-            int i_mode = *predict_mode;
+            for( ; *predict_mode >= 0; predict_mode++ )
+            {
+                int i_satd;
+                int i_mode = *predict_mode;
 
-            if( h->mb.b_lossless )
-                x264_predict_lossless_16x16( h, 0, i_mode );
-            else
-                h->predict_16x16[i_mode]( p_dst );
+                if( h->mb.b_lossless )
+                    x264_predict_lossless_16x16( h, 0, i_mode );
+                else
+                    h->predict_16x16[i_mode]( p_dst );
 
-            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
-                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
-            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
-            a->i_satd_i16x16_dir[i_mode] = i_satd;
+                i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
+                         lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
+                COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
+                a->i_satd_i16x16_dir[i_mode] = i_satd;
+            }
         }
-    }
 
-    if( h->sh.i_type == SLICE_TYPE_B )
-        /* cavlc mb type prefix */
-        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
+        if( h->sh.i_type == SLICE_TYPE_B )
+            /* cavlc mb type prefix */
+            a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
 
-    if( a->i_satd_i16x16 > i16x16_thresh )
-        return;
+        if( a->i_satd_i16x16 > i16x16_thresh )
+            return;
+    }
 
     uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
     /* 8x8 prediction selection */
@@ -870,7 +890,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
 
-            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
+            const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
             h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
 
             if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
@@ -985,7 +1005,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
 
-            predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
+            const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
 
             if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                 /* emulate missing topright samples */
@@ -2101,7 +2121,7 @@
         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
                    + ref_costs + l0_mv_cost + l1_mv_cost;
 
-        if( h->mb.b_chroma_me )
+        if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
         {
             ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
 
​

x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.h: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cabac.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.c: cabac bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cavlc.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cavlc.c: cavlc bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -500,6 +500,9 @@
         && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
     {
         bs_write1( s, MB_INTERLACED );
+#if !RDO_SKIP_BS
+        h->mb.field_decoding_flag = MB_INTERLACED;
+#endif
     }
 
 #if !RDO_SKIP_BS
​

x264-snapshot-20130723-2245.tar.bz2/encoder/encoder.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/encoder.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * encoder.c: top-level encoder functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -33,10 +33,6 @@
 #include "macroblock.h"
 #include "me.h"
 
-#if HAVE_VISUALIZE
-#include "common/visualize.h"
-#endif
-
 //#define DEBUG_MB_TYPE
 
 #define bs_write_ue bs_write_ue_big
@@ -82,7 +78,7 @@
 
 static void x264_frame_dump( x264_t *h )
 {
-    FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
+    FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
     if( !f )
         return;
 
@@ -403,21 +399,6 @@
 {
     if( h->param.i_sync_lookahead )
         x264_lower_thread_priority( 10 );
-
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
-}
-
-static void x264_lookahead_thread_init( x264_t *h )
-{
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
 }
 #endif
 
@@ -486,7 +467,7 @@
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
         return -1;
     }
-    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 )
+    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 )
     {
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" );
         return -1;
@@ -532,6 +513,12 @@
         return -1;
     }
 
+    if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 )
+    {
+        h->param.vui.i_sar_width = 0;
+        h->param.vui.i_sar_height = 0;
+    }
+
     if( h->param.i_threads == X264_THREADS_AUTO )
         h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
     int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
@@ -583,6 +570,8 @@
     {
         h->param.b_intra_refresh = 0;
         h->param.analyse.i_weighted_pred = 0;
+        h->param.i_frame_reference = 1;
+        h->param.i_dpb_size = 1;
     }
 
     h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
@@ -616,6 +605,188 @@
         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
         return -1;
     }
+
+    if( PARAM_INTERLACED )
+        h->param.b_pic_struct = 1;
+
+    if( h->param.i_avcintra_class )
+    {
+        if( BIT_DEPTH != 10 )
+        {
+            x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH );
+            x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" );
+            return -1;
+        }
+
+        int type = h->param.i_avcintra_class == 200 ? 2 :
+                   h->param.i_avcintra_class == 100 ? 1 :
+                   h->param.i_avcintra_class == 50 ? 0 : -1;
+        if( type < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" );
+            return -1;
+        }
+
+        /* [50/100/200][res][fps] */
+        static const struct
+        {
+            uint16_t fps_num;
+            uint16_t fps_den;
+            uint8_t interlaced;
+            uint16_t frame_size;
+            const uint8_t *cqm_4ic;
+            const uint8_t *cqm_8iy;
+        } avcintra_lut[3][2][7] =
+        {
+            {{{ 60000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }},
+             {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              {    25,    1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}},
+            {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    25,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}},
+            {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}
+        };
+
+        int res = -1;
+        if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type )
+        {
+            if(      h->param.i_width == 1440 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width ==  960 && h->param.i_height ==  720 ) res =  0;
+        }
+        else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type )
+        {
+            if(      h->param.i_width == 1920 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width == 1280 && h->param.i_height ==  720 ) res =  0;
+        }
+        else
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( res < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n",
+                      h->param.i_width, h->param.i_height, h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( h->param.nalu_process )
+        {
+            x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        if( !h->param.b_repeat_headers )
+        {
+            x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        int i;
+        uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den;
+        x264_reduce_fraction( &fps_num, &fps_den );
+        for( i = 0; i < 7; i++ )
+        {
+            if( avcintra_lut[type][res][i].fps_num == fps_num &&
+                avcintra_lut[type][res][i].fps_den == fps_den &&
+                avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED )
+            {
+                break;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * encoder.c: top-level encoder functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -33,10 +33,6 @@
 #include "macroblock.h"
 #include "me.h"
 
-#if HAVE_VISUALIZE
-#include "common/visualize.h"
-#endif
-
 //#define DEBUG_MB_TYPE
 
 #define bs_write_ue bs_write_ue_big
@@ -82,7 +78,7 @@
 
 static void x264_frame_dump( x264_t *h )
 {
-    FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
+    FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
     if( !f )
         return;
 
@@ -403,21 +399,6 @@
 {
     if( h->param.i_sync_lookahead )
         x264_lower_thread_priority( 10 );
-
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
-}
-
-static void x264_lookahead_thread_init( x264_t *h )
-{
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
 }
 #endif
 
@@ -486,7 +467,7 @@
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
         return -1;
     }
-    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 )
+    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 )
     {
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" );
         return -1;
@@ -532,6 +513,12 @@
         return -1;
     }
 
+    if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 )
+    {
+        h->param.vui.i_sar_width = 0;
+        h->param.vui.i_sar_height = 0;
+    }
+
     if( h->param.i_threads == X264_THREADS_AUTO )
         h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
     int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
@@ -583,6 +570,8 @@
     {
         h->param.b_intra_refresh = 0;
         h->param.analyse.i_weighted_pred = 0;
+        h->param.i_frame_reference = 1;
+        h->param.i_dpb_size = 1;
     }
 
     h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
@@ -616,6 +605,188 @@
         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
         return -1;
     }
+
+    if( PARAM_INTERLACED )
+        h->param.b_pic_struct = 1;
+
+    if( h->param.i_avcintra_class )
+    {
+        if( BIT_DEPTH != 10 )
+        {
+            x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH );
+            x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" );
+            return -1;
+        }
+
+        int type = h->param.i_avcintra_class == 200 ? 2 :
+                   h->param.i_avcintra_class == 100 ? 1 :
+                   h->param.i_avcintra_class == 50 ? 0 : -1;
+        if( type < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" );
+            return -1;
+        }
+
+        /* [50/100/200][res][fps] */
+        static const struct
+        {
+            uint16_t fps_num;
+            uint16_t fps_den;
+            uint8_t interlaced;
+            uint16_t frame_size;
+            const uint8_t *cqm_4ic;
+            const uint8_t *cqm_8iy;
+        } avcintra_lut[3][2][7] =
+        {
+            {{{ 60000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }},
+             {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              {    25,    1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}},
+            {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    25,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}},
+            {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}
+        };
+
+        int res = -1;
+        if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type )
+        {
+            if(      h->param.i_width == 1440 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width ==  960 && h->param.i_height ==  720 ) res =  0;
+        }
+        else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type )
+        {
+            if(      h->param.i_width == 1920 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width == 1280 && h->param.i_height ==  720 ) res =  0;
+        }
+        else
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( res < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n",
+                      h->param.i_width, h->param.i_height, h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( h->param.nalu_process )
+        {
+            x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        if( !h->param.b_repeat_headers )
+        {
+            x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        int i;
+        uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den;
+        x264_reduce_fraction( &fps_num, &fps_den );
+        for( i = 0; i < 7; i++ )
+        {
+            if( avcintra_lut[type][res][i].fps_num == fps_num &&
+                avcintra_lut[type][res][i].fps_den == fps_den &&
+                avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED )
+            {
+                break;
​

x264-snapshot-20130723-2245.tar.bz2/encoder/lookahead.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/lookahead.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lookahead.c: high-level lookahead functions
  *****************************************************************************
- * Copyright (C) 2010-2013 Avail Media and x264 project
+ * Copyright (C) 2010-2014 Avail Media and x264 project
  *
  * Authors: Michael Kazmier <mkazmier@availmedia.com>
  *          Alex Giladi <agiladi@availmedia.com>
@@ -89,16 +89,11 @@
 
 static void *x264_lookahead_thread( x264_t *h )
 {
-    int shift;
-#if HAVE_MMX
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
     while( !h->lookahead->b_exit_thread )
     {
         x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
         x264_pthread_mutex_lock( &h->lookahead->next.mutex );
-        shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
+        int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
         x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
         x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
         if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lookahead.c: high-level lookahead functions
  *****************************************************************************
- * Copyright (C) 2010-2013 Avail Media and x264 project
+ * Copyright (C) 2010-2014 Avail Media and x264 project
  *
  * Authors: Michael Kazmier <mkazmier@availmedia.com>
  *          Alex Giladi <agiladi@availmedia.com>
@@ -89,16 +89,11 @@
 
 static void *x264_lookahead_thread( x264_t *h )
 {
-    int shift;
-#if HAVE_MMX
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
     while( !h->lookahead->b_exit_thread )
     {
         x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
         x264_pthread_mutex_lock( &h->lookahead->next.mutex );
-        shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
+        int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
         x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
         x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
         if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
​

x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.c Changed

@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -157,10 +157,7 @@
         return;
     }
 
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0;
+    CLEAR_16x16_NNZ( p );
 
     h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -157,10 +157,7 @@
         return;
     }
 
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0;
+    CLEAR_16x16_NNZ( p );
 
     h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
 
​

x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -98,10 +98,10 @@
 #define CLEAR_16x16_NNZ( p ) \
 do\
 {\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
 } while(0)
 
 /* A special for loop that iterates branchlessly over each set

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -98,10 +98,10 @@
 #define CLEAR_16x16_NNZ( p ) \
 do\
 {\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
 } while(0)
 
 /* A special for loop that iterates branchlessly over each set
​

x264-snapshot-20130723-2245.tar.bz2/encoder/me.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.c: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -365,14 +365,14 @@
 
             /* hexagon */
             COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
-            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
+            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */
             bcost <<= 3;
             COPY1_IF_LT( bcost, (costs[0]<<3)+2 );
             COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
             COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
-            COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
-            COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
-            COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
+            COPY1_IF_LT( bcost, (costs[4]<<3)+5 );
+            COPY1_IF_LT( bcost, (costs[5]<<3)+6 );
+            COPY1_IF_LT( bcost, (costs[6]<<3)+7 );
 
             if( bcost&7 )
             {
@@ -671,7 +671,7 @@
                     for( i = 0; i < xn-2; i += 3 )
                     {
                         pixel *ref = p_fref_w+min_x+my*stride;
-                        int sads[3];
+                        ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */
                         h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
                         for( int j = 0; j < 3; j++ )
                         {

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.c: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -365,14 +365,14 @@
 
             /* hexagon */
             COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
-            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
+            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */
             bcost <<= 3;
             COPY1_IF_LT( bcost, (costs[0]<<3)+2 );
             COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
             COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
-            COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
-            COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
-            COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
+            COPY1_IF_LT( bcost, (costs[4]<<3)+5 );
+            COPY1_IF_LT( bcost, (costs[5]<<3)+6 );
+            COPY1_IF_LT( bcost, (costs[6]<<3)+7 );
 
             if( bcost&7 )
             {
@@ -671,7 +671,7 @@
                     for( i = 0; i < xn-2; i += 3 )
                     {
                         pixel *ref = p_fref_w+min_x+my*stride;
-                        int sads[3];
+                        ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */
                         h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
                         for( int j = 0; j < 3; j++ )
                         {
​

x264-snapshot-20130723-2245.tar.bz2/encoder/me.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.h: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.c: ratecontrol
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Michael Niedermayer <michaelni@gmx.at>
@@ -101,7 +101,7 @@
     double vbv_max_rate;        /* # of bits added to buffer_fill per second */
     predictor_t *pred;          /* predict frame size from satd */
     int single_frame_vbv;
-    double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
+    float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
 
     /* ABR stuff */
     int    last_satd;
@@ -653,8 +653,9 @@
                       h->param.rc.i_vbv_buffer_size );
         }
 
-        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
-        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
+        int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000;
+        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size;
+        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size;
 
         /* Init HRD */
         if( h->param.i_nal_hrd && b_init )
@@ -666,15 +667,12 @@
             #define BR_SHIFT  6
             #define CPB_SHIFT 4
 
-            int bitrate = 1000*h->param.rc.i_vbv_max_bitrate;
-            int bufsize = 1000*h->param.rc.i_vbv_buffer_size;
-
             // normalize HRD size and rate to the value / scale notation
-            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
+            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
             h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
-            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
+            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
             h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
 
             #undef CPB_SHIFT
@@ -705,7 +703,7 @@
         h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
 
         if( rc->b_vbv_min_rate )
-            rc->bitrate = h->param.rc.i_bitrate * 1000.;
+            rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size;
         rc->buffer_rate = vbv_max_bitrate / rc->fps;
         rc->vbv_max_rate = vbv_max_bitrate;
         rc->buffer_size = vbv_buffer_size;
@@ -761,7 +759,7 @@
     else
         rc->qcompress = h->param.rc.f_qcompress;
 
-    rc->bitrate = h->param.rc.i_bitrate * 1000.;
+    rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.);
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
     rc->nmb = h->mb.i_mb_count;
     rc->last_non_b_pict_type = -1;
@@ -872,7 +870,7 @@
             char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
             if( !mbtree_stats_in )
                 return -1;
-            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
+            rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
             x264_free( mbtree_stats_in );
             if( !rc->p_mbtree_stat_file_in )
             {
@@ -913,7 +911,7 @@
              * so we'll at least try to roughly approximate this effect. */
             res_factor_bits = powf( res_factor, 0.7 );
 
-            if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
+            if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
             {
                 x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" );
                 return -1;
@@ -1140,7 +1138,7 @@
         if( !rc->psz_stat_file_tmpname )
             return -1;
 
-        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
+        rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" );
         if( rc->p_stat_file_out == NULL )
         {
             x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
@@ -1158,7 +1156,7 @@
             if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
                 return -1;
 
-            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
+            rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
             if( rc->p_mbtree_stat_file_out == NULL )
             {
                 x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
@@ -1338,7 +1336,7 @@
         b_regular_file = x264_is_regular_file( rc->p_stat_file_out );
         fclose( rc->p_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
+            if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
@@ -1350,7 +1348,7 @@
         b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out );
         fclose( rc->p_mbtree_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
+            if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
@@ -1398,7 +1396,7 @@
     x264_emms();
 
     if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
-        x264_encoder_reconfig( h, zone->param );
+        x264_encoder_reconfig_apply( h, zone->param );
     rc->prev_zone = zone;
 
     if( h->param.rc.b_stat_read )
@@ -2108,15 +2106,25 @@
     rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
 
     if( rct->buffer_fill_final < 0 )
-        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
+    {
+        double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale;
+        if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment )
+            x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow );
+        else
+            x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow );
+    }
     rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
-    rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
 
-    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
+    if( h->param.i_avcintra_class )
+        rct->buffer_fill_final += buffer_size;
+    else
+        rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
+
+    if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size )
     {
         int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
         filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
-        bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
+        bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
         rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
     }
     else
@@ -2719,7 +2727,7 @@
      * we're adding or removing bits), and starting on the earliest frame that
      * can influence the buffer fill of that end frame. */
     x264_ratecontrol_t *rcc = h->rc;
-    const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
+    const double buffer_min = .1 * rcc->buffer_size;
     const double buffer_max = .9 * rcc->buffer_size;
     double fill = fills[*t0-1];
     double parity = over ? 1. : -1.;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.c: ratecontrol
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Michael Niedermayer <michaelni@gmx.at>
@@ -101,7 +101,7 @@
     double vbv_max_rate;        /* # of bits added to buffer_fill per second */
     predictor_t *pred;          /* predict frame size from satd */
     int single_frame_vbv;
-    double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
+    float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
 
     /* ABR stuff */
     int    last_satd;
@@ -653,8 +653,9 @@
                       h->param.rc.i_vbv_buffer_size );
         }
 
-        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
-        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
+        int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000;
+        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size;
+        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size;
 
         /* Init HRD */
         if( h->param.i_nal_hrd && b_init )
@@ -666,15 +667,12 @@
             #define BR_SHIFT  6
             #define CPB_SHIFT 4
 
-            int bitrate = 1000*h->param.rc.i_vbv_max_bitrate;
-            int bufsize = 1000*h->param.rc.i_vbv_buffer_size;
-
             // normalize HRD size and rate to the value / scale notation
-            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
+            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
             h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
-            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
+            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
             h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
 
             #undef CPB_SHIFT
@@ -705,7 +703,7 @@
         h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
 
         if( rc->b_vbv_min_rate )
-            rc->bitrate = h->param.rc.i_bitrate * 1000.;
+            rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size;
         rc->buffer_rate = vbv_max_bitrate / rc->fps;
         rc->vbv_max_rate = vbv_max_bitrate;
         rc->buffer_size = vbv_buffer_size;
@@ -761,7 +759,7 @@
     else
         rc->qcompress = h->param.rc.f_qcompress;
 
-    rc->bitrate = h->param.rc.i_bitrate * 1000.;
+    rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.);
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
     rc->nmb = h->mb.i_mb_count;
     rc->last_non_b_pict_type = -1;
@@ -872,7 +870,7 @@
             char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
             if( !mbtree_stats_in )
                 return -1;
-            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
+            rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
             x264_free( mbtree_stats_in );
             if( !rc->p_mbtree_stat_file_in )
             {
@@ -913,7 +911,7 @@
              * so we'll at least try to roughly approximate this effect. */
             res_factor_bits = powf( res_factor, 0.7 );
 
-            if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
+            if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
             {
                 x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" );
                 return -1;
@@ -1140,7 +1138,7 @@
         if( !rc->psz_stat_file_tmpname )
             return -1;
 
-        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
+        rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" );
         if( rc->p_stat_file_out == NULL )
         {
             x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
@@ -1158,7 +1156,7 @@
             if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
                 return -1;
 
-            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
+            rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
             if( rc->p_mbtree_stat_file_out == NULL )
             {
                 x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
@@ -1338,7 +1336,7 @@
         b_regular_file = x264_is_regular_file( rc->p_stat_file_out );
         fclose( rc->p_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
+            if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
@@ -1350,7 +1348,7 @@
         b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out );
         fclose( rc->p_mbtree_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
+            if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
@@ -1398,7 +1396,7 @@
     x264_emms();
 
     if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
-        x264_encoder_reconfig( h, zone->param );
+        x264_encoder_reconfig_apply( h, zone->param );
     rc->prev_zone = zone;
 
     if( h->param.rc.b_stat_read )
@@ -2108,15 +2106,25 @@
     rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
 
     if( rct->buffer_fill_final < 0 )
-        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
+    {
+        double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale;
+        if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment )
+            x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow );
+        else
+            x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow );
+    }
     rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
-    rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
 
-    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
+    if( h->param.i_avcintra_class )
+        rct->buffer_fill_final += buffer_size;
+    else
+        rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
+
+    if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size )
     {
         int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
         filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
-        bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
+        bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
         rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
     }
     else
@@ -2719,7 +2727,7 @@
      * we're adding or removing bits), and starting on the earliest frame that
      * can influence the buffer fill of that end frame. */
     x264_ratecontrol_t *rcc = h->rc;
-    const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
+    const double buffer_min = .1 * rcc->buffer_size;
     const double buffer_max = .9 * rcc->buffer_size;
     double fill = fills[*t0-1];
     double parity = over ? 1. : -1.;
​

x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.h: ratecontrol
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -43,6 +43,7 @@
 void x264_ratecontrol_delete( x264_t * );
 
 void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param );
 
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets );
 int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
​

x264-snapshot-20130723-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/rdo.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rdo.c: rate-distortion optimization
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/set.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set: header writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -31,6 +31,7 @@
 
 // Indexed by pic_struct values
 static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
+const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
 
 static void transpose( uint8_t *buf, int w )
 {
@@ -91,7 +92,7 @@
     bs_write( s, 8, payload_size-i );
 
     for( i = 0; i < payload_size; i++ )
-        bs_write(s, 8, payload[i] );
+        bs_write( s, 8, payload[i] );
 
     bs_rbsp_trailing( s );
     bs_flush( s );
@@ -227,7 +228,8 @@
     }
 
     /* FIXME: not sufficient for interlaced video */
-    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
+    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
+                                         sps->i_chroma_format_idc == CHROMA_420;
     if( sps->vui.b_chroma_loc_info_present )
     {
         sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
@@ -249,7 +251,7 @@
 
     // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
 
-    sps->vui.b_bitstream_restriction = 1;
+    sps->vui.b_bitstream_restriction = param->i_keyint_max > 1;
     if( sps->vui.b_bitstream_restriction )
     {
         sps->vui.b_motion_vectors_over_pic_boundaries = 1;
@@ -421,7 +423,7 @@
     pps->i_sps_id = sps->i_id;
     pps->b_cabac = param->b_cabac;
 
-    pps->b_pic_order = param->b_interlaced;
+    pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
     pps->i_num_slice_groups = 1;
 
     pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
@@ -575,7 +577,7 @@
 
     memcpy( payload, uuid, 16 );
     sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
-             "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s",
+             "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s",
              X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
     length = strlen(payload)+1;
 
@@ -725,6 +727,49 @@
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
 }
 
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
+{
+    uint8_t data[512];
+    const char *msg = "UMID";
+    const int len = 497;
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    data[20] = 0x13;
+    /* These bytes appear to be some sort of frame/seconds counter in certain applications,
+     * but others jump around, so leave them as zero for now */
+    data[21] = data[22] = 0;
+
+    data[28] = 0x14;
+    data[36] = 0x60;
+    data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
+{
+    uint8_t data[6000];
+    const char *msg = "VANC";
+    if( len > sizeof(data) )
+    {
+        x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
+        return -1;
+    }
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
 const x264_level_t x264_levels[] =
 {
     { 10,    1485,    99,    396,     64,    175,  64, 64,  0, 2, 0, 0, 1 },

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set: header writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -31,6 +31,7 @@
 
 // Indexed by pic_struct values
 static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
+const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
 
 static void transpose( uint8_t *buf, int w )
 {
@@ -91,7 +92,7 @@
     bs_write( s, 8, payload_size-i );
 
     for( i = 0; i < payload_size; i++ )
-        bs_write(s, 8, payload[i] );
+        bs_write( s, 8, payload[i] );
 
     bs_rbsp_trailing( s );
     bs_flush( s );
@@ -227,7 +228,8 @@
     }
 
     /* FIXME: not sufficient for interlaced video */
-    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
+    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
+                                         sps->i_chroma_format_idc == CHROMA_420;
     if( sps->vui.b_chroma_loc_info_present )
     {
         sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
@@ -249,7 +251,7 @@
 
     // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
 
-    sps->vui.b_bitstream_restriction = 1;
+    sps->vui.b_bitstream_restriction = param->i_keyint_max > 1;
     if( sps->vui.b_bitstream_restriction )
     {
         sps->vui.b_motion_vectors_over_pic_boundaries = 1;
@@ -421,7 +423,7 @@
     pps->i_sps_id = sps->i_id;
     pps->b_cabac = param->b_cabac;
 
-    pps->b_pic_order = param->b_interlaced;
+    pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
     pps->i_num_slice_groups = 1;
 
     pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
@@ -575,7 +577,7 @@
 
     memcpy( payload, uuid, 16 );
     sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
-             "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s",
+             "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s",
              X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
     length = strlen(payload)+1;
 
@@ -725,6 +727,49 @@
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
 }
 
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
+{
+    uint8_t data[512];
+    const char *msg = "UMID";
+    const int len = 497;
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    data[20] = 0x13;
+    /* These bytes appear to be some sort of frame/seconds counter in certain applications,
+     * but others jump around, so leave them as zero for now */
+    data[21] = data[22] = 0;
+
+    data[28] = 0x14;
+    data[36] = 0x60;
+    data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
+{
+    uint8_t data[6000];
+    const char *msg = "VANC";
+    if( len > sizeof(data) )
+    {
+        x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
+        return -1;
+    }
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
 const x264_level_t x264_levels[] =
 {
     { 10,    1485,    99,    396,     64,    175,  64, 64,  0, 2, 0, 0, 1 },
​

x264-snapshot-20130723-2245.tar.bz2/encoder/set.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: header writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -38,6 +38,8 @@
 void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
 void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
 void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
+int  x264_sei_avcintra_umid_write( x264_t *h, bs_t *s );
+int  x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len );
 void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
 void x264_filler_write( x264_t *h, bs_t *s, int filler );
 
​

x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype-cl.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype-cl.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead)
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype.c: lookahead analysis
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -1022,9 +1022,12 @@
     return i_score;
 }
 
+/* Trade off precision in mbtree for increased range */
+#define MBTREE_PRECISION 0.5f
+
 static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
 {
-    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
+    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
     float weightdelta = 0.0;
     if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
         weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
@@ -1051,11 +1054,12 @@
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
     int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
     int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
-    int *buf = h->scratch_buffer;
+    int16_t *buf = h->scratch_buffer;
     uint16_t *propagate_cost = frames[b]->i_propagate_cost;
+    uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
 
     x264_emms();
-    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
+    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
 
     /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
     if( !referenced )
@@ -1065,72 +1069,17 @@
     {
         int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
         h->mc.mbtree_propagate_cost( buf, propagate_cost,
-            frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
+            frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
             frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
         if( referenced )
             propagate_cost += h->mb.i_mb_width;
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
+
+        h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
+                                     bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
+        if( b != p1 )
         {
-            int propagate_amount = buf[h->mb.i_mb_x];
-            /* Don't propagate for an intra block. */
-            if( propagate_amount > 0 )
-            {
-                /* Access width-2 bitfield. */
-                int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
-                /* Follow the MVs to the previous frame(s). */
-                for( int list = 0; list < 2; list++ )
-                    if( (lists_used >> list)&1 )
-                    {
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
-                        int listamount = propagate_amount;
-                        /* Apply bipred weighting. */
-                        if( lists_used == 3 )
-                            listamount = (listamount * bipred_weights[list] + 32) >> 6;
-
-                        /* Early termination for simple case of mv0. */
-                        if( !M32( mvs[list][mb_index] ) )
-                        {
-                            CLIP_ADD( ref_costs[list][mb_index], listamount );
-                            continue;
-                        }
-
-                        int x = mvs[list][mb_index][0];
-                        int y = mvs[list][mb_index][1];
-                        int mbx = (x>>5)+h->mb.i_mb_x;
-                        int mby = (y>>5)+h->mb.i_mb_y;
-                        int idx0 = mbx + mby * h->mb.i_mb_stride;
-                        int idx1 = idx0 + 1;
-                        int idx2 = idx0 + h->mb.i_mb_stride;
-                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
-                        x &= 31;
-                        y &= 31;
-                        int idx0weight = (32-y)*(32-x);
-                        int idx1weight = (32-y)*x;
-                        int idx2weight = y*(32-x);
-                        int idx3weight = y*x;
-
-                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
-                         * be counted. */
-                        if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
-                        {
-                            CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                        else /* Check offsets individually */
-                        {
-                            if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                    }
-            }
+            h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
+                                         bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
         }
     }

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype.c: lookahead analysis
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -1022,9 +1022,12 @@
     return i_score;
 }
 
+/* Trade off precision in mbtree for increased range */
+#define MBTREE_PRECISION 0.5f
+
 static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
 {
-    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
+    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
     float weightdelta = 0.0;
     if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
         weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
@@ -1051,11 +1054,12 @@
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
     int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
     int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
-    int *buf = h->scratch_buffer;
+    int16_t *buf = h->scratch_buffer;
     uint16_t *propagate_cost = frames[b]->i_propagate_cost;
+    uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
 
     x264_emms();
-    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
+    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
 
     /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
     if( !referenced )
@@ -1065,72 +1069,17 @@
     {
         int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
         h->mc.mbtree_propagate_cost( buf, propagate_cost,
-            frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
+            frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
             frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
         if( referenced )
             propagate_cost += h->mb.i_mb_width;
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
+
+        h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
+                                     bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
+        if( b != p1 )
         {
-            int propagate_amount = buf[h->mb.i_mb_x];
-            /* Don't propagate for an intra block. */
-            if( propagate_amount > 0 )
-            {
-                /* Access width-2 bitfield. */
-                int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
-                /* Follow the MVs to the previous frame(s). */
-                for( int list = 0; list < 2; list++ )
-                    if( (lists_used >> list)&1 )
-                    {
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
-                        int listamount = propagate_amount;
-                        /* Apply bipred weighting. */
-                        if( lists_used == 3 )
-                            listamount = (listamount * bipred_weights[list] + 32) >> 6;
-
-                        /* Early termination for simple case of mv0. */
-                        if( !M32( mvs[list][mb_index] ) )
-                        {
-                            CLIP_ADD( ref_costs[list][mb_index], listamount );
-                            continue;
-                        }
-
-                        int x = mvs[list][mb_index][0];
-                        int y = mvs[list][mb_index][1];
-                        int mbx = (x>>5)+h->mb.i_mb_x;
-                        int mby = (y>>5)+h->mb.i_mb_y;
-                        int idx0 = mbx + mby * h->mb.i_mb_stride;
-                        int idx1 = idx0 + 1;
-                        int idx2 = idx0 + h->mb.i_mb_stride;
-                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
-                        x &= 31;
-                        y &= 31;
-                        int idx0weight = (32-y)*(32-x);
-                        int idx1weight = (32-y)*x;
-                        int idx2weight = y*(32-x);
-                        int idx3weight = y*x;
-
-                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
-                         * be counted. */
-                        if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
-                        {
-                            CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                        else /* Check offsets individually */
-                        {
-                            if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                    }
-            }
+            h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
+                                         bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
         }
     }
 
​

x264-snapshot-20130723-2245.tar.bz2/filters/filters.c -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * filters.c: common filter functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Diogo Franco <diogomfranco@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/filters/filters.h -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * filters.h: common filter functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Diogo Franco <diogomfranco@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/cache.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/cache.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cache.c: cache video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/crop.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/crop.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * crop.c: crop video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          James Darnley <james.darnley@gmail.com>
@@ -105,8 +105,7 @@
     for( int i = 0; i < output->img.planes; i++ )
     {
         intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
-        offset += h->dims[0] * h->csp->width[i];
-        offset *= x264_cli_csp_depth_factor( output->img.csp );
+        offset += h->dims[0] * h->csp->width[i] * x264_cli_csp_depth_factor( output->img.csp );
         output->img.plane[i] += offset;
     }
     return 0;
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/depth.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/depth.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * depth.c: bit-depth conversion video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Oskar Arvidsson <oskar@irock.se>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/fix_vfr_pts.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/fix_vfr_pts.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * fix_vfr_pts.c: vfr pts fixing video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * internal.c: video filter utilities
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * internal.h: video filter utilities
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/resize.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/resize.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * resize.c: resize video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -45,8 +45,8 @@
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 
-#ifndef PIX_FMT_BGRA64
-#define PIX_FMT_BGRA64 PIX_FMT_NONE
+#ifndef AV_PIX_FMT_BGRA64
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE
 #endif
 
 typedef struct
@@ -94,9 +94,12 @@
 
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
     {
-        printf( "%s", x264_cli_csps[i].name );
-        if( i+1 < X264_CSP_CLI_MAX )
-            printf( ", " );
+        if( x264_cli_csps[i].name )
+        {
+            printf( "%s", x264_cli_csps[i].name );
+            if( i+1 < X264_CSP_CLI_MAX )
+                printf( ", " );
+        }
     }
     printf( "\n"
             "               - depth: 8 or 16 bits per pixel [keep current]\n"
@@ -143,19 +146,19 @@
     switch( csp&X264_CSP_MASK )
     {
         case X264_CSP_YV12: /* specially handled via swapping chroma */
-        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
         case X264_CSP_YV16: /* specially handled via swapping chroma */
-        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P;
         case X264_CSP_YV24: /* specially handled via swapping chroma */
-        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
-        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
-        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48     : PIX_FMT_BGR24;
-        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64    : PIX_FMT_BGRA;
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P;
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48     : AV_PIX_FMT_RGB24;
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48     : AV_PIX_FMT_BGR24;
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64    : AV_PIX_FMT_BGRA;
         /* the next csp has no equivalent 16bit depth in swscale */
-        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV12;
         /* the next csp is no supported by swscale at all */
         case X264_CSP_NV16:
-        default:            return PIX_FMT_NONE;
+        default:            return AV_PIX_FMT_NONE;
     }
 }
 
@@ -175,12 +178,12 @@
     int pix_fmt = convert_csp_to_pix_fmt( csp );
     // first determine the base csp
     int ret = X264_CSP_NONE;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt;
-    if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name )
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt );
+    if( !pix_desc || !pix_desc->name )
         return ret;
 
     const char *pix_fmt_name = pix_desc->name;
-    int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL);
+    int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL);
     int is_bgr = !!strstr( pix_fmt_name, "bgr" );
     if( is_bgr || is_rgb )
     {
@@ -243,8 +246,11 @@
         if( strlen( str_csp ) == 0 )
             csp = info->csp & X264_CSP_MASK;
         else
-            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
-                csp--;
+            for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- )
+            {
+                if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) )
+                    break;
+            }
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
         h->dst_csp = csp;
         if( depth == 16 )
@@ -392,7 +398,7 @@
     h->scale = input_prop;
     if( !h->buffer_allocated )
     {
-        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
+        if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
             return -1;
         h->buffer_allocated = 1;
     }
@@ -462,11 +468,11 @@
     int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
 
     /* confirm swscale can support this conversion */
-    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
                    info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
-    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
                    h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * resize.c: resize video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -45,8 +45,8 @@
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 
-#ifndef PIX_FMT_BGRA64
-#define PIX_FMT_BGRA64 PIX_FMT_NONE
+#ifndef AV_PIX_FMT_BGRA64
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE
 #endif
 
 typedef struct
@@ -94,9 +94,12 @@
 
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
     {
-        printf( "%s", x264_cli_csps[i].name );
-        if( i+1 < X264_CSP_CLI_MAX )
-            printf( ", " );
+        if( x264_cli_csps[i].name )
+        {
+            printf( "%s", x264_cli_csps[i].name );
+            if( i+1 < X264_CSP_CLI_MAX )
+                printf( ", " );
+        }
     }
     printf( "\n"
             "               - depth: 8 or 16 bits per pixel [keep current]\n"
@@ -143,19 +146,19 @@
     switch( csp&X264_CSP_MASK )
     {
         case X264_CSP_YV12: /* specially handled via swapping chroma */
-        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
         case X264_CSP_YV16: /* specially handled via swapping chroma */
-        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P;
         case X264_CSP_YV24: /* specially handled via swapping chroma */
-        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
-        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
-        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48     : PIX_FMT_BGR24;
-        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64    : PIX_FMT_BGRA;
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P;
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48     : AV_PIX_FMT_RGB24;
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48     : AV_PIX_FMT_BGR24;
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64    : AV_PIX_FMT_BGRA;
         /* the next csp has no equivalent 16bit depth in swscale */
-        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV12;
         /* the next csp is no supported by swscale at all */
         case X264_CSP_NV16:
-        default:            return PIX_FMT_NONE;
+        default:            return AV_PIX_FMT_NONE;
     }
 }
 
@@ -175,12 +178,12 @@
     int pix_fmt = convert_csp_to_pix_fmt( csp );
     // first determine the base csp
     int ret = X264_CSP_NONE;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt;
-    if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name )
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt );
+    if( !pix_desc || !pix_desc->name )
         return ret;
 
     const char *pix_fmt_name = pix_desc->name;
-    int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL);
+    int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL);
     int is_bgr = !!strstr( pix_fmt_name, "bgr" );
     if( is_bgr || is_rgb )
     {
@@ -243,8 +246,11 @@
         if( strlen( str_csp ) == 0 )
             csp = info->csp & X264_CSP_MASK;
         else
-            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
-                csp--;
+            for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- )
+            {
+                if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) )
+                    break;
+            }
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
         h->dst_csp = csp;
         if( depth == 16 )
@@ -392,7 +398,7 @@
     h->scale = input_prop;
     if( !h->buffer_allocated )
     {
-        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
+        if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
             return -1;
         h->buffer_allocated = 1;
     }
@@ -462,11 +468,11 @@
     int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
 
     /* confirm swscale can support this conversion */
-    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
                    info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
-    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
                    h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/select_every.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/select_every.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * select_every.c: select-every video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/source.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/source.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * source.c: source video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/video.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * video.c: video filters
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/video.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * video.h: video filters
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/input/avs.c -> x264-snapshot-20140321-2245.tar.bz2/input/avs.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * avs.c: avisynth input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -35,7 +35,7 @@
 #define avs_address dlsym
 #else
 #include <windows.h>
-#define avs_open LoadLibrary( "avisynth" )
+#define avs_open LoadLibraryW( L"avisynth" )
 #define avs_close FreeLibrary
 #define avs_address GetProcAddress
 #endif
@@ -172,7 +172,7 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
 {
-    FILE *fh = fopen( psz_filename, "r" );
+    FILE *fh = x264_fopen( psz_filename, "r" );
     if( !fh )
         return -1;
     FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
@@ -192,7 +192,16 @@
     if( avs_version <= 0 )
         return -1;
     x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
+
+#ifdef _WIN32
+    /* Avisynth doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    AVS_Value arg = avs_new_value_string( ansi_filename );
+#else
     AVS_Value arg = avs_new_value_string( psz_filename );
+#endif
+
     AVS_Value res;
     char *filename_ext = get_filename_extension( psz_filename );
 
@@ -329,11 +338,11 @@
         info->csp = X264_CSP_I420;
 #if HAVE_SWSCALE
     else if( avs_is_yuy2( vi ) )
-        info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
     else if( avs_is_yv411( vi ) )
-        info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
     else if( avs_is_y8( vi ) )
-        info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
 #endif
     else
         info->csp = X264_CSP_NONE;
@@ -352,7 +361,7 @@
     if( cli_csp )
         pic->img.planes = cli_csp->planes;
 #if HAVE_SWSCALE
-    else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) )
+    else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) )
         pic->img.planes = 3;
     else
         pic->img.planes = 1; //y8 and yuy2 are one plane

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * avs.c: avisynth input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -35,7 +35,7 @@
 #define avs_address dlsym
 #else
 #include <windows.h>
-#define avs_open LoadLibrary( "avisynth" )
+#define avs_open LoadLibraryW( L"avisynth" )
 #define avs_close FreeLibrary
 #define avs_address GetProcAddress
 #endif
@@ -172,7 +172,7 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
 {
-    FILE *fh = fopen( psz_filename, "r" );
+    FILE *fh = x264_fopen( psz_filename, "r" );
     if( !fh )
         return -1;
     FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
@@ -192,7 +192,16 @@
     if( avs_version <= 0 )
         return -1;
     x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
+
+#ifdef _WIN32
+    /* Avisynth doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    AVS_Value arg = avs_new_value_string( ansi_filename );
+#else
     AVS_Value arg = avs_new_value_string( psz_filename );
+#endif
+
     AVS_Value res;
     char *filename_ext = get_filename_extension( psz_filename );
 
@@ -329,11 +338,11 @@
         info->csp = X264_CSP_I420;
 #if HAVE_SWSCALE
     else if( avs_is_yuy2( vi ) )
-        info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
     else if( avs_is_yv411( vi ) )
-        info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
     else if( avs_is_y8( vi ) )
-        info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
 #endif
     else
         info->csp = X264_CSP_NONE;
@@ -352,7 +361,7 @@
     if( cli_csp )
         pic->img.planes = cli_csp->planes;
 #if HAVE_SWSCALE
-    else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) )
+    else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) )
         pic->img.planes = 3;
     else
         pic->img.planes = 1; //y8 and yuy2 are one plane
​

x264-snapshot-20130723-2245.tar.bz2/input/ffms.c -> x264-snapshot-20140321-2245.tar.bz2/input/ffms.c Changed

@@ -1,10 +1,11 @@
 /*****************************************************************************
  * ffms.c: ffmpegsource input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,8 +35,6 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#else
-#define SetConsoleTitle(t)
 #endif
 
 typedef struct
@@ -60,7 +59,7 @@
     char buf[200];
     sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total );
     fprintf( stderr, "%s  \r", buf+5 );
-    SetConsoleTitle( buf );
+    x264_cli_set_console_title( buf );
     fflush( stderr );
     return 0;
 }
@@ -70,9 +69,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -82,7 +81,21 @@
     ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) );
     if( !h )
         return -1;
+
+#ifdef __MINGW32__
+    /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */
     FFMS_Init( 0, 0 );
+    char src_filename[MAX_PATH];
+    char idx_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    if( opt->index_file )
+        FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" );
+#else
+    FFMS_Init( 0, 1 );
+    char *src_filename = psz_filename;
+    char *idx_filename = opt->index_file;
+#endif
+
     FFMS_ErrorInfo e;
     e.BufferSize = 0;
     int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW;
@@ -90,29 +103,29 @@
     FFMS_Index *idx = NULL;
     if( opt->index_file )
     {
-        struct stat index_s, input_s;
-        if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) &&
-            input_s.st_mtime < index_s.st_mtime )
-            idx = FFMS_ReadIndex( opt->index_file, &e );
+        x264_struct_stat index_s, input_s;
+        if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) &&
+            input_s.st_mtime < index_s.st_mtime && index_s.st_size )
+            idx = FFMS_ReadIndex( idx_filename, &e );
     }
     if( !idx )
     {
         if( opt->progress )
         {
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
             fprintf( stderr, "                                            \r" );
         }
         else
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
         FAIL_IF_ERROR( !idx, "could not create index\n" )
-        if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
+        if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) )
             x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
     }
 
     int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
     FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
 
-    h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
+    h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e );
     FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
 
     h->track = FFMS_GetTrackFromVideo( h->video_source );

 
@@ -1,10 +1,11 @@
 /*****************************************************************************
  * ffms.c: ffmpegsource input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,8 +35,6 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#else
-#define SetConsoleTitle(t)
 #endif
 
 typedef struct
@@ -60,7 +59,7 @@
     char buf[200];
     sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total );
     fprintf( stderr, "%s  \r", buf+5 );
-    SetConsoleTitle( buf );
+    x264_cli_set_console_title( buf );
     fflush( stderr );
     return 0;
 }
@@ -70,9 +69,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -82,7 +81,21 @@
     ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) );
     if( !h )
         return -1;
+
+#ifdef __MINGW32__
+    /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */
     FFMS_Init( 0, 0 );
+    char src_filename[MAX_PATH];
+    char idx_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    if( opt->index_file )
+        FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" );
+#else
+    FFMS_Init( 0, 1 );
+    char *src_filename = psz_filename;
+    char *idx_filename = opt->index_file;
+#endif
+
     FFMS_ErrorInfo e;
     e.BufferSize = 0;
     int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW;
@@ -90,29 +103,29 @@
     FFMS_Index *idx = NULL;
     if( opt->index_file )
     {
-        struct stat index_s, input_s;
-        if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) &&
-            input_s.st_mtime < index_s.st_mtime )
-            idx = FFMS_ReadIndex( opt->index_file, &e );
+        x264_struct_stat index_s, input_s;
+        if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) &&
+            input_s.st_mtime < index_s.st_mtime && index_s.st_size )
+            idx = FFMS_ReadIndex( idx_filename, &e );
     }
     if( !idx )
     {
         if( opt->progress )
         {
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
             fprintf( stderr, "                                            \r" );
         }
         else
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
         FAIL_IF_ERROR( !idx, "could not create index\n" )
-        if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
+        if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) )
             x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
     }
 
     int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
     FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
 
-    h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
+    h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e );
     FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
 
     h->track = FFMS_GetTrackFromVideo( h->video_source );
​

x264-snapshot-20130723-2245.tar.bz2/input/input.c -> x264-snapshot-20140321-2245.tar.bz2/input/input.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.c: common input functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -42,7 +42,8 @@
 int x264_cli_csp_is_invalid( int csp )
 {
     int csp_mask = csp & X264_CSP_MASK;
-    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
+    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX ||
+           csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER;
 }
 
 int x264_cli_csp_depth_factor( int csp )
@@ -74,7 +75,7 @@
     return size;
 }
 
-int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
 {
     memset( pic, 0, sizeof(cli_pic_t) );
     int csp_mask = csp & X264_CSP_MASK;
@@ -87,15 +88,29 @@
     pic->img.height = height;
     for( int i = 0; i < pic->img.planes; i++ )
     {
-         pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
-         if( !pic->img.plane[i] )
-             return -1;
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
+        int stride = width * x264_cli_csps[csp_mask].width[i];
+        stride *= x264_cli_csp_depth_factor( csp );
+        stride = ALIGN( stride, align );
+        uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
+        pic->img.plane[i] = x264_malloc( size );
+        if( !pic->img.plane[i] )
+            return -1;
+        pic->img.stride[i] = stride;
     }
 
     return 0;
 }
 
+int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
+}
+
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
+}
+
 void x264_cli_pic_clean( cli_pic_t *pic )
 {
     for( int i = 0; i < pic->img.planes; i++ )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.c: common input functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -42,7 +42,8 @@
 int x264_cli_csp_is_invalid( int csp )
 {
     int csp_mask = csp & X264_CSP_MASK;
-    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
+    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX ||
+           csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER;
 }
 
 int x264_cli_csp_depth_factor( int csp )
@@ -74,7 +75,7 @@
     return size;
 }
 
-int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
 {
     memset( pic, 0, sizeof(cli_pic_t) );
     int csp_mask = csp & X264_CSP_MASK;
@@ -87,15 +88,29 @@
     pic->img.height = height;
     for( int i = 0; i < pic->img.planes; i++ )
     {
-         pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
-         if( !pic->img.plane[i] )
-             return -1;
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
+        int stride = width * x264_cli_csps[csp_mask].width[i];
+        stride *= x264_cli_csp_depth_factor( csp );
+        stride = ALIGN( stride, align );
+        uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
+        pic->img.plane[i] = x264_malloc( size );
+        if( !pic->img.plane[i] )
+            return -1;
+        pic->img.stride[i] = stride;
     }
 
     return 0;
 }
 
+int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
+}
+
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
+}
+
 void x264_cli_pic_clean( cli_pic_t *pic )
 {
     for( int i = 0; i < pic->img.planes; i++ )
​

x264-snapshot-20130723-2245.tar.bz2/input/input.h -> x264-snapshot-20140321-2245.tar.bz2/input/input.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.h: file input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -124,6 +124,7 @@
 int      x264_cli_csp_is_invalid( int csp );
 int      x264_cli_csp_depth_factor( int csp );
 int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
+int      x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height );
 void     x264_cli_pic_clean( cli_pic_t *pic );
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
 uint64_t x264_cli_pic_size( int csp, int width, int height );
​

x264-snapshot-20130723-2245.tar.bz2/input/lavf.c -> x264-snapshot-20140321-2245.tar.bz2/input/lavf.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lavf.c: libavformat input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -53,9 +53,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -162,7 +162,7 @@
     if( opt->resolution )
     {
         av_dict_set( &options, "video_size", opt->resolution, 0 );
-        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P );
+        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P );
         av_dict_set( &options, "pixel_format", csp, 0 );
     }
 
@@ -210,7 +210,7 @@
 
     /* avisynth stores rgb data vertically flipped. */
     if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
-        (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) )
+        (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
         info->csp |= X264_CSP_VFLIP;
 
     *p_handle = h;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lavf.c: libavformat input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -53,9 +53,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -162,7 +162,7 @@
     if( opt->resolution )
     {
         av_dict_set( &options, "video_size", opt->resolution, 0 );
-        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P );
+        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P );
         av_dict_set( &options, "pixel_format", csp, 0 );
     }
 
@@ -210,7 +210,7 @@
 
     /* avisynth stores rgb data vertically flipped. */
     if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
-        (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) )
+        (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
         info->csp |= X264_CSP_VFLIP;
 
     *p_handle = h;
​

x264-snapshot-20130723-2245.tar.bz2/input/raw.c -> x264-snapshot-20140321-2245.tar.bz2/input/raw.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -55,8 +55,11 @@
     FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
     if( opt->colorspace )
     {
-        for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); )
-            info->csp--;
+        for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
+        {
+            if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) )
+                break;
+        }
         FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace );
     }
     else /* default */
@@ -70,7 +73,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen( psz_filename, "rb" );
+        h->fh = x264_fopen( psz_filename, "rb" );
     if( h->fh == NULL )
         return -1;
 
@@ -99,14 +102,14 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
 {
     int error = 0;
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
     for( int i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -131,13 +134,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -55,8 +55,11 @@
     FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
     if( opt->colorspace )
     {
-        for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); )
-            info->csp--;
+        for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
+        {
+            if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) )
+                break;
+        }
         FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace );
     }
     else /* default */
@@ -70,7 +73,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen( psz_filename, "rb" );
+        h->fh = x264_fopen( psz_filename, "rb" );
     if( h->fh == NULL )
         return -1;
 
@@ -99,14 +102,14 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
 {
     int error = 0;
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
     for( int i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -131,13 +134,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;
​

x264-snapshot-20130723-2245.tar.bz2/input/thread.c -> x264-snapshot-20140321-2245.tar.bz2/input/thread.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * thread.c: threaded input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/input/timecode.c -> x264-snapshot-20140321-2245.tar.bz2/input/timecode.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * timecode.c: timecode file input
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
  *
@@ -368,7 +368,7 @@
     timecode_input.picture_alloc = h->input.picture_alloc;
     timecode_input.picture_clean = h->input.picture_clean;
 
-    tcfile_in = fopen( psz_filename, "rb" );
+    tcfile_in = x264_fopen( psz_filename, "rb" );
     FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
     else if( !x264_is_regular_file( tcfile_in ) )
     {
​

x264-snapshot-20130723-2245.tar.bz2/input/y4m.c -> x264-snapshot-20140321-2245.tar.bz2/input/y4m.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * y4m.c: y4m input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -81,7 +81,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen(psz_filename, "rb");
+        h->fh = x264_fopen(psz_filename, "rb");
     if( h->fh == NULL )
         return -1;
 
@@ -223,7 +223,7 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
 {
     size_t slen = strlen( Y4M_FRAME_MAGIC );
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
@@ -249,7 +249,7 @@
     for( i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -274,13 +274,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * y4m.c: y4m input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -81,7 +81,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen(psz_filename, "rb");
+        h->fh = x264_fopen(psz_filename, "rb");
     if( h->fh == NULL )
         return -1;
 
@@ -223,7 +223,7 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
 {
     size_t slen = strlen( Y4M_FRAME_MAGIC );
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
@@ -249,7 +249,7 @@
     for( i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -274,13 +274,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;
​

x264-snapshot-20130723-2245.tar.bz2/output/flv.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv.c: flv muxer
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
@@ -75,11 +75,10 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    flv_hnd_t *p_flv = malloc( sizeof(*p_flv) );
     *p_handle = NULL;
+    flv_hnd_t *p_flv = calloc( 1, sizeof(flv_hnd_t) );
     if( !p_flv )
         return -1;
-    memset( p_flv, 0, sizeof(*p_flv) );
 
     p_flv->b_dts_compress = opt->use_dts_compress;
 
​

x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv_bytestream.c: flv muxer utilities
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
@@ -87,16 +87,14 @@
 
 flv_buffer *flv_create_writer( const char *filename )
 {
-    flv_buffer *c = malloc( sizeof(*c) );
-
+    flv_buffer *c = calloc( 1, sizeof(flv_buffer) );
     if( !c )
         return NULL;
-    memset( c, 0, sizeof(*c) );
 
     if( !strcmp( filename, "-" ) )
         c->fp = stdout;
     else
-        c->fp = fopen( filename, "wb" );
+        c->fp = x264_fopen( filename, "wb" );
     if( !c->fp )
     {
         free( c );
​

x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.h -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv_bytestream.h: flv muxer utilities
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/output/matroska.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska.c: matroska muxer
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -33,6 +33,7 @@
     int width, height, d_width, d_height;
 
     int display_size_units;
+    int stereo_mode;
 
     int64_t frame_duration;
 
@@ -44,16 +45,11 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mkv_hnd_t *p_mkv;
-
     *p_handle = NULL;
-
-    p_mkv  = malloc( sizeof(*p_mkv) );
+    mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) );
     if( !p_mkv )
         return -1;
 
-    memset( p_mkv, 0, sizeof(*p_mkv) );
-
     p_mkv->w = mk_create_writer( psz_filename );
     if( !p_mkv->w )
     {
@@ -84,6 +80,7 @@
     p_mkv->width = p_mkv->d_width = p_param->i_width;
     p_mkv->height = p_mkv->d_height = p_param->i_height;
     p_mkv->display_size_units = DS_PIXELS;
+    p_mkv->stereo_mode = p_param->i_frame_packing;
 
     if( p_param->vui.i_sar_width && p_param->vui.i_sar_height
         && p_param->vui.i_sar_width != p_param->vui.i_sar_height )
@@ -152,7 +149,7 @@
     ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
                            avcC, avcC_len, p_mkv->frame_duration, 50000,
                            p_mkv->width, p_mkv->height,
-                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units );
+                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode );
     if( ret < 0 )
         return ret;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska.c: matroska muxer
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -33,6 +33,7 @@
     int width, height, d_width, d_height;
 
     int display_size_units;
+    int stereo_mode;
 
     int64_t frame_duration;
 
@@ -44,16 +45,11 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mkv_hnd_t *p_mkv;
-
     *p_handle = NULL;
-
-    p_mkv  = malloc( sizeof(*p_mkv) );
+    mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) );
     if( !p_mkv )
         return -1;
 
-    memset( p_mkv, 0, sizeof(*p_mkv) );
-
     p_mkv->w = mk_create_writer( psz_filename );
     if( !p_mkv->w )
     {
@@ -84,6 +80,7 @@
     p_mkv->width = p_mkv->d_width = p_param->i_width;
     p_mkv->height = p_mkv->d_height = p_param->i_height;
     p_mkv->display_size_units = DS_PIXELS;
+    p_mkv->stereo_mode = p_param->i_frame_packing;
 
     if( p_param->vui.i_sar_width && p_param->vui.i_sar_height
         && p_param->vui.i_sar_width != p_param->vui.i_sar_height )
@@ -152,7 +149,7 @@
     ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
                            avcC, avcC_len, p_mkv->frame_duration, 50000,
                            p_mkv->width, p_mkv->height,
-                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units );
+                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode );
     if( ret < 0 )
         return ret;
 
​

x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.c: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -74,10 +74,9 @@
     }
     else
     {
-        c = malloc( sizeof(*c) );
+        c = calloc( 1, sizeof(mk_context) );
         if( !c )
             return NULL;
-        memset( c, 0, sizeof(*c) );
     }
 
     c->parent = parent;
@@ -291,12 +290,10 @@
 
 mk_writer *mk_create_writer( const char *filename )
 {
-    mk_writer *w = malloc( sizeof(*w) );
+    mk_writer *w = calloc( 1, sizeof(mk_writer) );
     if( !w )
         return NULL;
 
-    memset( w, 0, sizeof(*w) );
-
     w->root = mk_create_context( w, NULL, 0 );
     if( !w->root )
     {
@@ -307,7 +304,7 @@
     if( !strcmp( filename, "-" ) )
         w->fp = stdout;
     else
-        w->fp = fopen( filename, "wb" );
+        w->fp = x264_fopen( filename, "wb" );
     if( !w->fp )
     {
         mk_destroy_contexts( w );
@@ -320,13 +317,15 @@
     return w;
 }
 
+static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13};
+
 int mk_write_header( mk_writer *w, const char *writing_app,
                      const char *codec_id,
                      const void *codec_private, unsigned codec_private_size,
                      int64_t default_frame_duration,
                      int64_t timescale,
                      unsigned width, unsigned height,
-                     unsigned d_width, unsigned d_height, int display_size_units )
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode )
 {
     mk_context  *c, *ti, *v;
 
@@ -382,6 +381,8 @@
     CHECK( mk_write_uint( v, 0x54b2, display_size_units ) );
     CHECK( mk_write_uint( v, 0x54b0, d_width ) );
     CHECK( mk_write_uint( v, 0x54ba, d_height ) );
+    if( stereo_mode >= 0 && stereo_mode <= 5 )
+        CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) );
     CHECK( mk_close_context( v, 0 ) );
 
     CHECK( mk_close_context( ti, 0 ) );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.c: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -74,10 +74,9 @@
     }
     else
     {
-        c = malloc( sizeof(*c) );
+        c = calloc( 1, sizeof(mk_context) );
         if( !c )
             return NULL;
-        memset( c, 0, sizeof(*c) );
     }
 
     c->parent = parent;
@@ -291,12 +290,10 @@
 
 mk_writer *mk_create_writer( const char *filename )
 {
-    mk_writer *w = malloc( sizeof(*w) );
+    mk_writer *w = calloc( 1, sizeof(mk_writer) );
     if( !w )
         return NULL;
 
-    memset( w, 0, sizeof(*w) );
-
     w->root = mk_create_context( w, NULL, 0 );
     if( !w->root )
     {
@@ -307,7 +304,7 @@
     if( !strcmp( filename, "-" ) )
         w->fp = stdout;
     else
-        w->fp = fopen( filename, "wb" );
+        w->fp = x264_fopen( filename, "wb" );
     if( !w->fp )
     {
         mk_destroy_contexts( w );
@@ -320,13 +317,15 @@
     return w;
 }
 
+static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13};
+
 int mk_write_header( mk_writer *w, const char *writing_app,
                      const char *codec_id,
                      const void *codec_private, unsigned codec_private_size,
                      int64_t default_frame_duration,
                      int64_t timescale,
                      unsigned width, unsigned height,
-                     unsigned d_width, unsigned d_height, int display_size_units )
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode )
 {
     mk_context  *c, *ti, *v;
 
@@ -382,6 +381,8 @@
     CHECK( mk_write_uint( v, 0x54b2, display_size_units ) );
     CHECK( mk_write_uint( v, 0x54b0, d_width ) );
     CHECK( mk_write_uint( v, 0x54ba, d_height ) );
+    if( stereo_mode >= 0 && stereo_mode <= 5 )
+        CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) );
     CHECK( mk_close_context( v, 0 ) );
 
     CHECK( mk_close_context( ti, 0 ) );
​

x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.h -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.h: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -42,7 +42,7 @@
                      int64_t default_frame_duration,
                      int64_t timescale,
                      unsigned width, unsigned height,
-                     unsigned d_width, unsigned d_height, int display_size_units );
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode );
 
 int mk_start_frame( mk_writer *w );
 int mk_add_frame_data( mk_writer *w, const void *data, unsigned size );
​

x264-snapshot-20130723-2245.tar.bz2/output/mp4.c -> x264-snapshot-20140321-2245.tar.bz2/output/mp4.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mp4.c: mp4 muxer
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,13 +27,8 @@
 #include "output.h"
 #include <gpac/isomedia.h>
 
-#if HAVE_GF_MALLOC
-#undef malloc
-#undef free
-#undef realloc
-#define malloc gf_malloc
-#define free gf_free
-#define realloc gf_realloc
+#ifdef _WIN32
+#include <windows.h>
 #endif
 
 typedef struct
@@ -170,20 +165,25 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mp4_hnd_t *p_mp4;
-
     *p_handle = NULL;
-    FILE *fh = fopen( psz_filename, "w" );
+    FILE *fh = x264_fopen( psz_filename, "w" );
     if( !fh )
         return -1;
     FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
     fclose( fh );
 
-    if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    if( !p_mp4 )
         return -1;
 
-    memset( p_mp4, 0, sizeof(mp4_hnd_t) );
+#ifdef _WIN32
+    /* GPAC doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
+    p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
+#else
     p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
+#endif
 
     p_mp4->b_dts_compress = opt->use_dts_compress;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mp4.c: mp4 muxer
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,13 +27,8 @@
 #include "output.h"
 #include <gpac/isomedia.h>
 
-#if HAVE_GF_MALLOC
-#undef malloc
-#undef free
-#undef realloc
-#define malloc gf_malloc
-#define free gf_free
-#define realloc gf_realloc
+#ifdef _WIN32
+#include <windows.h>
 #endif
 
 typedef struct
@@ -170,20 +165,25 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mp4_hnd_t *p_mp4;
-
     *p_handle = NULL;
-    FILE *fh = fopen( psz_filename, "w" );
+    FILE *fh = x264_fopen( psz_filename, "w" );
     if( !fh )
         return -1;
     FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
     fclose( fh );
 
-    if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    if( !p_mp4 )
         return -1;
 
-    memset( p_mp4, 0, sizeof(mp4_hnd_t) );
+#ifdef _WIN32
+    /* GPAC doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
+    p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
+#else
     p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
+#endif
 
     p_mp4->b_dts_compress = opt->use_dts_compress;
 
​

x264-snapshot-20140321-2245.tar.bz2/output/mp4_lsmash.c Added

@@ -0,0 +1,419 @@
+/*****************************************************************************
+ * mp4_lsmash.c: mp4 muxer using L-SMASH
+ *****************************************************************************
+ * Copyright (C) 2003-2014 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *          Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
+ *          Takashi Hirata <silverfilain@gmail.com>
+ *          golgol7777 <golgol7777@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "output.h"
+#include <lsmash.h>
+
+#define H264_NALU_LENGTH_SIZE 4
+
+/*******************/
+
+#define MP4_LOG_ERROR( ... )                x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ )
+#define MP4_LOG_WARNING( ... )              x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ )
+#define MP4_LOG_INFO( ... )                 x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ )
+#define MP4_FAIL_IF_ERR( cond, ... )        FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ )
+
+/* For close_file() */
+#define MP4_LOG_IF_ERR( cond, ... )\
+if( cond )\
+{\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+}
+
+/* For open_file() */
+#define MP4_FAIL_IF_ERR_EX( cond, ... )\
+if( cond )\
+{\
+    remove_mp4_hnd( p_mp4 );\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+    return -1;\
+}
+
+/*******************/
+
+typedef struct
+{
+    lsmash_root_t *p_root;
+    lsmash_video_summary_t *summary;
+    int b_stdout;
+    uint32_t i_movie_timescale;
+    uint32_t i_video_timescale;
+    uint32_t i_track;
+    uint32_t i_sample_entry;
+    uint64_t i_time_inc;
+    int64_t i_start_offset;
+    uint64_t i_first_cts;
+    uint64_t i_prev_dts;
+    uint32_t i_sei_size;
+    uint8_t *p_sei_buffer;
+    int i_numframe;
+    int64_t i_init_delta;
+    int i_delay_frames;
+    int b_dts_compress;
+    int i_dts_compress_multiplier;
+    int b_use_recovery;
+    int b_fragments;
+} mp4_hnd_t;
+
+/*******************/
+
+static void remove_mp4_hnd( hnd_t handle )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    if( !p_mp4 )
+        return;
+    if( p_mp4->p_sei_buffer )
+    {
+        free( p_mp4->p_sei_buffer );
+        p_mp4->p_sei_buffer = NULL;
+    }
+    if( p_mp4->p_root )
+    {
+        lsmash_destroy_root( p_mp4->p_root );
+        p_mp4->p_root = NULL;
+    }
+    free( p_mp4 );
+}
+
+/*******************/
+
+static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts )
+{
+    mp4_hnd_t *p_mp4 = handle;
+
+    if( !p_mp4 )
+        return 0;
+
+    if( p_mp4->p_root )
+    {
+        double actual_duration = 0;
+        if( p_mp4->i_track )
+        {
+            /* Flush the rest of samples and add the last sample_delta. */
+            uint32_t last_delta = largest_pts - second_largest_pts;
+            MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ),
+                            "failed to flush the rest of samples.\n" );
+
+            if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 )    /* avoid zero division */
+                actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale;
+            else
+                MP4_LOG_ERROR( "timescale is broken.\n" );
+
+            /*
+             * Declare the explicit time-line mapping.
+             * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment
+             * is given by not the movie timescale but rather the media timescale.
+             * The reason is that ISO media have two time-lines, presentation and media time-line,
+             * and an edit maps the presentation time-line to the media time-line.
+             * According to QuickTime file format specification and the actual playback in QuickTime Player,
+             * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes
+             * the track's media_rate so that the entire media can be used by the track.
+             * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly.
+             * Note: Any demuxers should follow the Edit List Box if it exists.
+             */
+            lsmash_edit_t edit;
+            edit.duration   = actual_duration;
+            edit.start_time = p_mp4->i_first_cts;
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
+            if( !p_mp4->b_fragments )
+            {
+                MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
+                                "failed to set timeline map for video.\n" );
+            }
+            else if( !p_mp4->b_stdout )
+                MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ),
+                                "failed to update timeline map for video.\n" );
+        }
+
+        MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" );
+    }
+
+    remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */
+
+    return 0;
+}
+
+static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
+{
+    *p_handle = NULL;
+
+    int b_regular = strcmp( psz_filename, "-" );
+    b_regular = b_regular && x264_is_regular_file_path( psz_filename );
+    if( b_regular )
+    {
+        FILE *fh = x264_fopen( psz_filename, "wb" );
+        MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename );
+        b_regular = x264_is_regular_file( fh );
+        fclose( fh );
+    }
+
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" );
+
+    p_mp4->b_dts_compress = opt->use_dts_compress;
+    p_mp4->b_use_recovery = 0; // we don't really support recovery
+    p_mp4->b_fragments    = !b_regular;
+    p_mp4->b_stdout       = !strcmp( psz_filename, "-" );
+
+    p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" );
+
+    p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->summary,
+                        "failed to allocate memory for summary information of video.\n" );
+    p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO;
+
+    *p_handle = p_mp4;
+
+    return 0;
+}
+
+static int set_param( hnd_t handle, x264_param_t *p_param )
+{
+    mp4_hnd_t *p_mp4 = handle;

 
@@ -0,0 +1,419 @@
+/*****************************************************************************
+ * mp4_lsmash.c: mp4 muxer using L-SMASH
+ *****************************************************************************
+ * Copyright (C) 2003-2014 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *          Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
+ *          Takashi Hirata <silverfilain@gmail.com>
+ *          golgol7777 <golgol7777@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "output.h"
+#include <lsmash.h>
+
+#define H264_NALU_LENGTH_SIZE 4
+
+/*******************/
+
+#define MP4_LOG_ERROR( ... )                x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ )
+#define MP4_LOG_WARNING( ... )              x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ )
+#define MP4_LOG_INFO( ... )                 x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ )
+#define MP4_FAIL_IF_ERR( cond, ... )        FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ )
+
+/* For close_file() */
+#define MP4_LOG_IF_ERR( cond, ... )\
+if( cond )\
+{\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+}
+
+/* For open_file() */
+#define MP4_FAIL_IF_ERR_EX( cond, ... )\
+if( cond )\
+{\
+    remove_mp4_hnd( p_mp4 );\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+    return -1;\
+}
+
+/*******************/
+
+typedef struct
+{
+    lsmash_root_t *p_root;
+    lsmash_video_summary_t *summary;
+    int b_stdout;
+    uint32_t i_movie_timescale;
+    uint32_t i_video_timescale;
+    uint32_t i_track;
+    uint32_t i_sample_entry;
+    uint64_t i_time_inc;
+    int64_t i_start_offset;
+    uint64_t i_first_cts;
+    uint64_t i_prev_dts;
+    uint32_t i_sei_size;
+    uint8_t *p_sei_buffer;
+    int i_numframe;
+    int64_t i_init_delta;
+    int i_delay_frames;
+    int b_dts_compress;
+    int i_dts_compress_multiplier;
+    int b_use_recovery;
+    int b_fragments;
+} mp4_hnd_t;
+
+/*******************/
+
+static void remove_mp4_hnd( hnd_t handle )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    if( !p_mp4 )
+        return;
+    if( p_mp4->p_sei_buffer )
+    {
+        free( p_mp4->p_sei_buffer );
+        p_mp4->p_sei_buffer = NULL;
+    }
+    if( p_mp4->p_root )
+    {
+        lsmash_destroy_root( p_mp4->p_root );
+        p_mp4->p_root = NULL;
+    }
+    free( p_mp4 );
+}
+
+/*******************/
+
+static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts )
+{
+    mp4_hnd_t *p_mp4 = handle;
+
+    if( !p_mp4 )
+        return 0;
+
+    if( p_mp4->p_root )
+    {
+        double actual_duration = 0;
+        if( p_mp4->i_track )
+        {
+            /* Flush the rest of samples and add the last sample_delta. */
+            uint32_t last_delta = largest_pts - second_largest_pts;
+            MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ),
+                            "failed to flush the rest of samples.\n" );
+
+            if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 )    /* avoid zero division */
+                actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale;
+            else
+                MP4_LOG_ERROR( "timescale is broken.\n" );
+
+            /*
+             * Declare the explicit time-line mapping.
+             * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment
+             * is given by not the movie timescale but rather the media timescale.
+             * The reason is that ISO media have two time-lines, presentation and media time-line,
+             * and an edit maps the presentation time-line to the media time-line.
+             * According to QuickTime file format specification and the actual playback in QuickTime Player,
+             * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes
+             * the track's media_rate so that the entire media can be used by the track.
+             * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly.
+             * Note: Any demuxers should follow the Edit List Box if it exists.
+             */
+            lsmash_edit_t edit;
+            edit.duration   = actual_duration;
+            edit.start_time = p_mp4->i_first_cts;
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
+            if( !p_mp4->b_fragments )
+            {
+                MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
+                                "failed to set timeline map for video.\n" );
+            }
+            else if( !p_mp4->b_stdout )
+                MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ),
+                                "failed to update timeline map for video.\n" );
+        }
+
+        MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" );
+    }
+
+    remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */
+
+    return 0;
+}
+
+static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
+{
+    *p_handle = NULL;
+
+    int b_regular = strcmp( psz_filename, "-" );
+    b_regular = b_regular && x264_is_regular_file_path( psz_filename );
+    if( b_regular )
+    {
+        FILE *fh = x264_fopen( psz_filename, "wb" );
+        MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename );
+        b_regular = x264_is_regular_file( fh );
+        fclose( fh );
+    }
+
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" );
+
+    p_mp4->b_dts_compress = opt->use_dts_compress;
+    p_mp4->b_use_recovery = 0; // we don't really support recovery
+    p_mp4->b_fragments    = !b_regular;
+    p_mp4->b_stdout       = !strcmp( psz_filename, "-" );
+
+    p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" );
+
+    p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->summary,
+                        "failed to allocate memory for summary information of video.\n" );
+    p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO;
+
+    *p_handle = p_mp4;
+
+    return 0;
+}
+
+static int set_param( hnd_t handle, x264_param_t *p_param )
+{
+    mp4_hnd_t *p_mp4 = handle;
​

x264-snapshot-20130723-2245.tar.bz2/output/output.h -> x264-snapshot-20140321-2245.tar.bz2/output/output.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * output.h: x264 file output modules
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/output/raw.c -> x264-snapshot-20140321-2245.tar.bz2/output/raw.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw muxer
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -30,7 +30,7 @@
 {
     if( !strcmp( psz_filename, "-" ) )
         *p_handle = stdout;
-    else if( !(*p_handle = fopen( psz_filename, "w+b" )) )
+    else if( !(*p_handle = x264_fopen( psz_filename, "w+b" )) )
         return -1;
 
     return 0;
​

x264-snapshot-20130723-2245.tar.bz2/tools/checkasm-a.asm -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* checkasm-a.asm: assembly check tool
 ;*****************************************************************************
-;* Copyright (C) 2008-2013 x264 project
+;* Copyright (C) 2008-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Henrik Gramner <henrik@gramner.com>
​

x264-snapshot-20130723-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * checkasm.c: assembly check tool
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -191,7 +191,6 @@
                     b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
                     b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
                     b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" :
-                    b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
                     b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
                     b->cpu&X264_CPU_BMI2 ? "_bmi2" :
                     b->cpu&X264_CPU_BMI1 ? "_bmi1" :
@@ -201,7 +200,7 @@
                     b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
 #endif
                     "",
-                    ((int64_t)10*b->cycles/b->den - nop_time)/4 );
+                    (int64_t)(10*b->cycles/b->den - nop_time)/4 );
         }
 }
 
@@ -407,7 +406,7 @@
                 } \
                 else \
                     call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
-                if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
+                if( memcmp(res_c, res_asm, N*sizeof(int)) ) \
                 { \
                     ok = 0; \
                     fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
@@ -1452,8 +1451,66 @@
                 }
         }
     }
+
+    if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
+    {
+        set_func_name( "plane_copy_deinterleave_rgb" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 2) >> 2;
+            int h = plane_specs[i].h;
+            intptr_t src_stride = plane_specs[i].src_stride;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t offv = dst_stride*h + 16;
+
+            for( int pw = 3; pw <= 4; pw++ )
+            {
+                memset( pbuf3, 0, 0x1000 );
+                memset( pbuf4, 0, 0x1000 );
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                for( int y = 0; y < h; y++ )
+                    if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
+                    {
+                        ok = 0;
+                        fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
+                        break;
+                    }
+            }
+        }
+    }
     report( "plane_copy :" );
 
+    if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
+    {
+        set_func_name( "plane_copy_deinterleave_v210" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 1) >> 1;
+            int h = plane_specs[i].h;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
+            intptr_t offv = dst_stride*h + 32;
+            memset( pbuf3, 0, 0x1000 );
+            memset( pbuf4, 0, 0x1000 );
+            call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            for( int y = 0; y < h; y++ )
+                if( memcmp( pbuf3+y*dst_stride,      pbuf4+y*dst_stride,      w*sizeof(uint16_t) ) ||
+                    memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
+                {
+                    ok = 0;
+                    fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+                    break;
+                }
+        }
+    }
+    report( "v210 :" );
+
     if( mc_a.hpel_filter != mc_ref.hpel_filter )
     {
         pixel *srchpel = pbuf1+8+2*64;
@@ -1541,16 +1598,17 @@
     INTEGRAL_INIT( integral_init8v, 9, sum, stride );
     report( "integral init :" );
 
+    ok = 1; used_asm = 0;
     if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
     {
-        ok = 1; used_asm = 1;
+        used_asm = 1;
         x264_emms();
         for( int i = 0; i < 10; i++ )
         {
-            float fps_factor = (rand()&65535) / 256.;
-            set_func_name( "mbtree_propagate" );
-            int *dsta = (int*)buf3;
-            int *dstc = dsta+400;
+            float fps_factor = (rand()&65535) / 65535.0f;
+            set_func_name( "mbtree_propagate_cost" );
+            int16_t *dsta = (int16_t*)buf3;
+            int16_t *dstc = dsta+400;
             uint16_t *prop = (uint16_t*)buf1;
             uint16_t *intra = (uint16_t*)buf4;
             uint16_t *inter = intra+128;
@@ -1572,12 +1630,60 @@
             {
                 ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
                 if( !ok )
-                    fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
+                    fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
             }
         }
-        report( "mbtree propagate :" );
     }
 
+    if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list )
+    {
+        used_asm = 1;
+        for( int i = 0; i < 8; i++ )
+        {
+            set_func_name( "mbtree_propagate_list" );
+            x264_t h;
+            int height = 4;
+            int width = 128;
+            int size = width*height;
+            h.mb.i_mb_stride = width;
+            h.mb.i_mb_width = width;
+            h.mb.i_mb_height = height;
+
+            uint16_t *ref_costsc = (uint16_t*)buf3;
+            uint16_t *ref_costsa = (uint16_t*)buf4;
+            int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size);
+            int16_t *propagate_amount = (int16_t*)(mvs + width);
+            uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width);
+            h.scratch_buffer2 = (uint8_t*)(ref_costsa + size);
+            int bipred_weight = (rand()%63)+1;
+            int list = i&1;
+            for( int j = 0; j < size; j++ )
+                ref_costsc[j] = ref_costsa[j] = rand()&32767;
+            for( int j = 0; j < width; j++ )
+            {
+                static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}};
+                for( int k = 0; k < 2; k++ )
+                    mvs[j][k] = (rand()&127) - 64;
+                propagate_amount[j] = rand()&32767;
+                lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT;
+            }
+
+            call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+
+            for( int j = 0; j < size && ok; j++ )
+            {
+                ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1;
+                if( !ok )
+                    fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] );
+            }
+
+            call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+        }
+    }
+    report( "mbtree :" );
+
     if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
     {
         set_func_name( "memcpy_aligned" );
@@ -1779,7 +1885,7 @@
         }
 
         h->param.rc.i_qp_min = 0;
-        h->param.rc.i_qp_max = QP_MAX;
+        h->param.rc.i_qp_max = QP_MAX_SPEC;
         x264_cqm_init( h );
         x264_quant_init( h, 0, &qf_c );
         x264_quant_init( h, cpu_ref, &qf_ref );
@@ -2504,7 +2610,7 @@
 {
     *cpu_ref = *cpu_new;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * checkasm.c: assembly check tool
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -191,7 +191,6 @@
                     b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
                     b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
                     b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" :
-                    b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
                     b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
                     b->cpu&X264_CPU_BMI2 ? "_bmi2" :
                     b->cpu&X264_CPU_BMI1 ? "_bmi1" :
@@ -201,7 +200,7 @@
                     b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
 #endif
                     "",
-                    ((int64_t)10*b->cycles/b->den - nop_time)/4 );
+                    (int64_t)(10*b->cycles/b->den - nop_time)/4 );
         }
 }
 
@@ -407,7 +406,7 @@
                 } \
                 else \
                     call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
-                if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
+                if( memcmp(res_c, res_asm, N*sizeof(int)) ) \
                 { \
                     ok = 0; \
                     fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
@@ -1452,8 +1451,66 @@
                 }
         }
     }
+
+    if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
+    {
+        set_func_name( "plane_copy_deinterleave_rgb" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 2) >> 2;
+            int h = plane_specs[i].h;
+            intptr_t src_stride = plane_specs[i].src_stride;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t offv = dst_stride*h + 16;
+
+            for( int pw = 3; pw <= 4; pw++ )
+            {
+                memset( pbuf3, 0, 0x1000 );
+                memset( pbuf4, 0, 0x1000 );
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                for( int y = 0; y < h; y++ )
+                    if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
+                    {
+                        ok = 0;
+                        fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
+                        break;
+                    }
+            }
+        }
+    }
     report( "plane_copy :" );
 
+    if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
+    {
+        set_func_name( "plane_copy_deinterleave_v210" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 1) >> 1;
+            int h = plane_specs[i].h;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
+            intptr_t offv = dst_stride*h + 32;
+            memset( pbuf3, 0, 0x1000 );
+            memset( pbuf4, 0, 0x1000 );
+            call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            for( int y = 0; y < h; y++ )
+                if( memcmp( pbuf3+y*dst_stride,      pbuf4+y*dst_stride,      w*sizeof(uint16_t) ) ||
+                    memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
+                {
+                    ok = 0;
+                    fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+                    break;
+                }
+        }
+    }
+    report( "v210 :" );
+
     if( mc_a.hpel_filter != mc_ref.hpel_filter )
     {
         pixel *srchpel = pbuf1+8+2*64;
@@ -1541,16 +1598,17 @@
     INTEGRAL_INIT( integral_init8v, 9, sum, stride );
     report( "integral init :" );
 
+    ok = 1; used_asm = 0;
     if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
     {
-        ok = 1; used_asm = 1;
+        used_asm = 1;
         x264_emms();
         for( int i = 0; i < 10; i++ )
         {
-            float fps_factor = (rand()&65535) / 256.;
-            set_func_name( "mbtree_propagate" );
-            int *dsta = (int*)buf3;
-            int *dstc = dsta+400;
+            float fps_factor = (rand()&65535) / 65535.0f;
+            set_func_name( "mbtree_propagate_cost" );
+            int16_t *dsta = (int16_t*)buf3;
+            int16_t *dstc = dsta+400;
             uint16_t *prop = (uint16_t*)buf1;
             uint16_t *intra = (uint16_t*)buf4;
             uint16_t *inter = intra+128;
@@ -1572,12 +1630,60 @@
             {
                 ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
                 if( !ok )
-                    fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
+                    fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
             }
         }
-        report( "mbtree propagate :" );
     }
 
+    if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list )
+    {
+        used_asm = 1;
+        for( int i = 0; i < 8; i++ )
+        {
+            set_func_name( "mbtree_propagate_list" );
+            x264_t h;
+            int height = 4;
+            int width = 128;
+            int size = width*height;
+            h.mb.i_mb_stride = width;
+            h.mb.i_mb_width = width;
+            h.mb.i_mb_height = height;
+
+            uint16_t *ref_costsc = (uint16_t*)buf3;
+            uint16_t *ref_costsa = (uint16_t*)buf4;
+            int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size);
+            int16_t *propagate_amount = (int16_t*)(mvs + width);
+            uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width);
+            h.scratch_buffer2 = (uint8_t*)(ref_costsa + size);
+            int bipred_weight = (rand()%63)+1;
+            int list = i&1;
+            for( int j = 0; j < size; j++ )
+                ref_costsc[j] = ref_costsa[j] = rand()&32767;
+            for( int j = 0; j < width; j++ )
+            {
+                static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}};
+                for( int k = 0; k < 2; k++ )
+                    mvs[j][k] = (rand()&127) - 64;
+                propagate_amount[j] = rand()&32767;
+                lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT;
+            }
+
+            call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+
+            for( int j = 0; j < size && ok; j++ )
+            {
+                ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1;
+                if( !ok )
+                    fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] );
+            }
+
+            call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+        }
+    }
+    report( "mbtree :" );
+
     if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
     {
         set_func_name( "memcpy_aligned" );
@@ -1779,7 +1885,7 @@
         }
 
         h->param.rc.i_qp_min = 0;
-        h->param.rc.i_qp_max = QP_MAX;
+        h->param.rc.i_qp_max = QP_MAX_SPEC;
         x264_cqm_init( h );
         x264_quant_init( h, 0, &qf_c );
         x264_quant_init( h, cpu_ref, &qf_ref );
@@ -2504,7 +2610,7 @@
 {
     *cpu_ref = *cpu_new;
​

x264-snapshot-20130723-2245.tar.bz2/tools/cltostr.pl -> x264-snapshot-20140321-2245.tar.bz2/tools/cltostr.pl Changed

 
@@ -1,6 +1,6 @@
 # Perl script used for compiling OpenCL src into x264 binary
 #
-# Copyright (C) 2013 x264 project
+# Copyright (C) 2013-2014 x264 project
 # Authors: Steve Borho <sborho@multicorewareinc.com>
 
 use Digest::MD5 qw(md5_hex);
​

x264-snapshot-20130723-2245.tar.bz2/version.sh -> x264-snapshot-20140321-2245.tar.bz2/version.sh Changed

 
@@ -1,5 +1,8 @@
-#!/bin/bash
+#!/bin/sh
 [ -n "$1" ] && cd $1
+
+git_version() {
+trap 'rm -f config.git-hash' EXIT
 git rev-list HEAD | sort > config.git-hash
 LOCALVER=`wc -l config.git-hash | awk '{print $1}'`
 if [ $LOCALVER \> 1 ] ; then
@@ -14,11 +17,13 @@
         VER="${VER}M"
     fi
     VER="$VER $(git rev-list HEAD -n 1 | cut -c 1-7)"
-    echo "#define X264_VERSION \" r$VER\""
-else
-    echo "#define X264_VERSION \"\""
-    VER="x"
+    VERSION=" r$VER"
 fi
-rm -f config.git-hash
+}
+
+VER="x"
+VERSION=""
+[ -d .git ] && (type git >/dev/null 2>&1) && git_version
+echo "#define X264_VERSION \"$VERSION\""
 API=`grep '#define X264_BUILD' < x264.h | sed -e 's/.* \([1-9][0-9]*\).*/\1/'`
 echo "#define X264_POINTVER \"0.$API.$VER\""
​

x264-snapshot-20130723-2245.tar.bz2/x264.c -> x264-snapshot-20140321-2245.tar.bz2/x264.c Changed

@@ -1,13 +1,14 @@
 /*****************************************************************************
  * x264: top-level x264cli functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Steven Walters <kemuri9@gmail.com>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Kieran Kunhya <kieran@kunhya.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -27,6 +28,15 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
+#ifdef _WIN32
+/* The following two defines must be located before the inclusion of any system header files. */
+#define WINVER       0x0500
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#include <io.h>       /* _setmode() */
+#include <fcntl.h>    /* _O_BINARY */
+#endif
+
 #include <signal.h>
 #define _GNU_SOURCE
 #include <getopt.h>
@@ -38,13 +48,6 @@
 
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
 
-#ifdef _WIN32
-#include <windows.h>
-#else
-#define GetConsoleTitle(t,n)
-#define SetConsoleTitle(t)
-#endif
-
 #if HAVE_LAVF
 #undef DECLARE_ALIGNED
 #include <libavformat/avformat.h>
@@ -61,18 +64,89 @@
 #include <ffms.h>
 #endif
 
+#ifdef _WIN32
+#define CONSOLE_TITLE_SIZE 200
+static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L"";
+
+void x264_cli_set_console_title( const char *title )
+{
+    wchar_t title_utf16[CONSOLE_TITLE_SIZE];
+    if( utf8_to_utf16( title, title_utf16 ) )
+        SetConsoleTitleW( title_utf16 );
+}
+
+static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size )
+{
+    int invalid;
+    return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid;
+}
+
+/* Some external libraries doesn't support Unicode in filenames,
+ * as a workaround we can try to get an ANSI filename instead. */
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    if( utf8_to_utf16( filename, filename_utf16 ) )
+    {
+        if( create_file )
+        {
+            /* Create the file using the Unicode filename if it doesn't already exist. */
+            FILE *fh = _wfopen( filename_utf16, L"ab" );
+            if( fh )
+                fclose( fh );
+        }
+
+        /* Check if the filename already is valid ANSI. */
+        if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+            return 1;
+
+        /* Check for a legacy 8.3 short filename. */
+        int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH );
+        if( short_length > 0 && short_length < MAX_PATH )
+            if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+                return 1;
+    }
+    return 0;
+}
+
+/* Retrieve command line arguments as UTF-8. */
+static int get_argv_utf8( int *argc_ptr, char ***argv_ptr )
+{
+    int ret = 0;
+    wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr );
+    if( argv_utf16 )
+    {
+        int argc = *argc_ptr;
+        int offset = (argc+1) * sizeof(char*);
+        int size = offset;
+
+        for( int i = 0; i < argc; i++ )
+            size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL );
+
+        char **argv = *argv_ptr = malloc( size );
+        if( argv )
+        {
+            for( int i = 0; i < argc; i++ )
+            {
+                argv[i] = (char*)argv + offset;
+                offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL );
+            }
+            argv[argc] = NULL;
+            ret = 1;
+        }
+        LocalFree( argv_utf16 );
+    }
+    return ret;
+}
+#endif
+
 /* Ctrl-C handler */
 static volatile int b_ctrl_c = 0;
-static int          b_exit_on_ctrl_c = 0;
 static void sigint_handler( int a )
 {
-    if( b_exit_on_ctrl_c )
-        exit(0);
     b_ctrl_c = 1;
 }
 
-static char UNUSED originalCTitle[200] = "";
-
 typedef struct {
     int b_progress;
     int i_seek;
@@ -114,7 +188,7 @@
     "raw",
     "mkv",
     "flv",
-#if HAVE_GPAC
+#if HAVE_GPAC || HAVE_LSMASH
     "mp4",
 #endif
     0
@@ -211,7 +285,7 @@
     fprintf( stderr, "%s [%s]: ", name, s_level );
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -221,7 +295,7 @@
         return;
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -275,18 +349,22 @@
     FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
 
 #ifdef _WIN32
-    _setmode(_fileno(stdin), _O_BINARY);
-    _setmode(_fileno(stdout), _O_BINARY);
-#endif
+    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
 
-    GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
+    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
+    _setmode( _fileno( stdin ),  _O_BINARY );
+    _setmode( _fileno( stdout ), _O_BINARY );
+    _setmode( _fileno( stderr ), _O_BINARY );
+#endif
 
     /* Parse command line */
     if( parse( argc, argv, &param, &opt ) < 0 )
         ret = -1;
 
+#ifdef _WIN32
     /* Restore title; it can be changed by input modules */
-    SetConsoleTitle( originalCTitle );
+    SetConsoleTitleW( org_console_title );
+#endif
 
     /* Control-C handler */
     signal( SIGINT, sigint_handler );
@@ -306,7 +384,10 @@
     if( opt.qpfile )
         fclose( opt.qpfile );

 
@@ -1,13 +1,14 @@
 /*****************************************************************************
  * x264: top-level x264cli functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Steven Walters <kemuri9@gmail.com>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Kieran Kunhya <kieran@kunhya.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -27,6 +28,15 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
+#ifdef _WIN32
+/* The following two defines must be located before the inclusion of any system header files. */
+#define WINVER       0x0500
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#include <io.h>       /* _setmode() */
+#include <fcntl.h>    /* _O_BINARY */
+#endif
+
 #include <signal.h>
 #define _GNU_SOURCE
 #include <getopt.h>
@@ -38,13 +48,6 @@
 
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
 
-#ifdef _WIN32
-#include <windows.h>
-#else
-#define GetConsoleTitle(t,n)
-#define SetConsoleTitle(t)
-#endif
-
 #if HAVE_LAVF
 #undef DECLARE_ALIGNED
 #include <libavformat/avformat.h>
@@ -61,18 +64,89 @@
 #include <ffms.h>
 #endif
 
+#ifdef _WIN32
+#define CONSOLE_TITLE_SIZE 200
+static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L"";
+
+void x264_cli_set_console_title( const char *title )
+{
+    wchar_t title_utf16[CONSOLE_TITLE_SIZE];
+    if( utf8_to_utf16( title, title_utf16 ) )
+        SetConsoleTitleW( title_utf16 );
+}
+
+static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size )
+{
+    int invalid;
+    return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid;
+}
+
+/* Some external libraries doesn't support Unicode in filenames,
+ * as a workaround we can try to get an ANSI filename instead. */
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    if( utf8_to_utf16( filename, filename_utf16 ) )
+    {
+        if( create_file )
+        {
+            /* Create the file using the Unicode filename if it doesn't already exist. */
+            FILE *fh = _wfopen( filename_utf16, L"ab" );
+            if( fh )
+                fclose( fh );
+        }
+
+        /* Check if the filename already is valid ANSI. */
+        if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+            return 1;
+
+        /* Check for a legacy 8.3 short filename. */
+        int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH );
+        if( short_length > 0 && short_length < MAX_PATH )
+            if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+                return 1;
+    }
+    return 0;
+}
+
+/* Retrieve command line arguments as UTF-8. */
+static int get_argv_utf8( int *argc_ptr, char ***argv_ptr )
+{
+    int ret = 0;
+    wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr );
+    if( argv_utf16 )
+    {
+        int argc = *argc_ptr;
+        int offset = (argc+1) * sizeof(char*);
+        int size = offset;
+
+        for( int i = 0; i < argc; i++ )
+            size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL );
+
+        char **argv = *argv_ptr = malloc( size );
+        if( argv )
+        {
+            for( int i = 0; i < argc; i++ )
+            {
+                argv[i] = (char*)argv + offset;
+                offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL );
+            }
+            argv[argc] = NULL;
+            ret = 1;
+        }
+        LocalFree( argv_utf16 );
+    }
+    return ret;
+}
+#endif
+
 /* Ctrl-C handler */
 static volatile int b_ctrl_c = 0;
-static int          b_exit_on_ctrl_c = 0;
 static void sigint_handler( int a )
 {
-    if( b_exit_on_ctrl_c )
-        exit(0);
     b_ctrl_c = 1;
 }
 
-static char UNUSED originalCTitle[200] = "";
-
 typedef struct {
     int b_progress;
     int i_seek;
@@ -114,7 +188,7 @@
     "raw",
     "mkv",
     "flv",
-#if HAVE_GPAC
+#if HAVE_GPAC || HAVE_LSMASH
     "mp4",
 #endif
     0
@@ -211,7 +285,7 @@
     fprintf( stderr, "%s [%s]: ", name, s_level );
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -221,7 +295,7 @@
         return;
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -275,18 +349,22 @@
     FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
 
 #ifdef _WIN32
-    _setmode(_fileno(stdin), _O_BINARY);
-    _setmode(_fileno(stdout), _O_BINARY);
-#endif
+    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
 
-    GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
+    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
+    _setmode( _fileno( stdin ),  _O_BINARY );
+    _setmode( _fileno( stdout ), _O_BINARY );
+    _setmode( _fileno( stderr ), _O_BINARY );
+#endif
 
     /* Parse command line */
     if( parse( argc, argv, &param, &opt ) < 0 )
         ret = -1;
 
+#ifdef _WIN32
     /* Restore title; it can be changed by input modules */
-    SetConsoleTitle( originalCTitle );
+    SetConsoleTitleW( org_console_title );
+#endif
 
     /* Control-C handler */
     signal( SIGINT, sigint_handler );
@@ -306,7 +384,10 @@
     if( opt.qpfile )
         fclose( opt.qpfile );
 
​

x264-snapshot-20130723-2245.tar.bz2/x264.h -> x264-snapshot-20140321-2245.tar.bz2/x264.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264.h: x264 public header
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -41,7 +41,7 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 135
+#define X264_BUILD 142
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -98,12 +98,15 @@
     int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
     int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
 
-    /* Size of payload in bytes. */
+    /* Size of payload (including any padding) in bytes. */
     int     i_payload;
     /* If param->b_annexb is set, Annex-B bytestream with startcode.
      * Otherwise, startcode is replaced with a 4-byte size.
      * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
     uint8_t *p_payload;
+
+    /* Size of padding in bytes. */
+    int i_padding;
 } x264_nal_t;
 
 /****************************************************************************
@@ -122,30 +125,29 @@
 #define X264_CPU_SSSE3           0x0000040
 #define X264_CPU_SSE4            0x0000080  /* SSE4.1 */
 #define X264_CPU_SSE42           0x0000100  /* SSE4.2 */
-#define X264_CPU_SSE_MISALIGN    0x0000200  /* Phenom support for misaligned SSE instruction arguments */
-#define X264_CPU_LZCNT           0x0000400  /* Phenom support for "leading zero count" instruction. */
-#define X264_CPU_AVX             0x0000800  /* AVX support: requires OS support even if YMM registers aren't used. */
-#define X264_CPU_XOP             0x0001000  /* AMD XOP */
-#define X264_CPU_FMA4            0x0002000  /* AMD FMA4 */
-#define X264_CPU_AVX2            0x0004000  /* AVX2 */
-#define X264_CPU_FMA3            0x0008000  /* Intel FMA3 */
-#define X264_CPU_BMI1            0x0010000  /* BMI1 */
-#define X264_CPU_BMI2            0x0020000  /* BMI2 */
+#define X264_CPU_LZCNT           0x0000200  /* Phenom support for "leading zero count" instruction. */
+#define X264_CPU_AVX             0x0000400  /* AVX support: requires OS support even if YMM registers aren't used. */
+#define X264_CPU_XOP             0x0000800  /* AMD XOP */
+#define X264_CPU_FMA4            0x0001000  /* AMD FMA4 */
+#define X264_CPU_AVX2            0x0002000  /* AVX2 */
+#define X264_CPU_FMA3            0x0004000  /* Intel FMA3 */
+#define X264_CPU_BMI1            0x0008000  /* BMI1 */
+#define X264_CPU_BMI2            0x0010000  /* BMI2 */
 /* x86 modifiers */
-#define X264_CPU_CACHELINE_32    0x0040000  /* avoid memory loads that span the border between two cachelines */
-#define X264_CPU_CACHELINE_64    0x0080000  /* 32/64 is the size of a cacheline in bytes */
-#define X264_CPU_SSE2_IS_SLOW    0x0100000  /* avoid most SSE2 functions on Athlon64 */
-#define X264_CPU_SSE2_IS_FAST    0x0200000  /* a few functions are only faster on Core2 and Phenom */
-#define X264_CPU_SLOW_SHUFFLE    0x0400000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
-#define X264_CPU_STACK_MOD4      0x0800000  /* if stack is only mod4 and not mod16 */
-#define X264_CPU_SLOW_CTZ        0x1000000  /* BSR/BSF x86 instructions are really slow on some CPUs */
-#define X264_CPU_SLOW_ATOM       0x2000000  /* The Atom is terrible: slow SSE unaligned loads, slow
+#define X264_CPU_CACHELINE_32    0x0020000  /* avoid memory loads that span the border between two cachelines */
+#define X264_CPU_CACHELINE_64    0x0040000  /* 32/64 is the size of a cacheline in bytes */
+#define X264_CPU_SSE2_IS_SLOW    0x0080000  /* avoid most SSE2 functions on Athlon64 */
+#define X264_CPU_SSE2_IS_FAST    0x0100000  /* a few functions are only faster on Core2 and Phenom */
+#define X264_CPU_SLOW_SHUFFLE    0x0200000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
+#define X264_CPU_STACK_MOD4      0x0400000  /* if stack is only mod4 and not mod16 */
+#define X264_CPU_SLOW_CTZ        0x0800000  /* BSR/BSF x86 instructions are really slow on some CPUs */
+#define X264_CPU_SLOW_ATOM       0x1000000  /* The Atom is terrible: slow SSE unaligned loads, slow
                                              * SIMD multiplies, slow SIMD variable shifts, slow pshufb,
                                              * cacheline split penalties -- gather everything here that
                                              * isn't shared by other CPUs to avoid making half a dozen
                                              * new SLOW flags. */
-#define X264_CPU_SLOW_PSHUFB     0x4000000  /* such as on the Intel Atom */
-#define X264_CPU_SLOW_PALIGNR    0x8000000  /* such as on the AMD Bobcat */
+#define X264_CPU_SLOW_PSHUFB     0x2000000  /* such as on the Intel Atom */
+#define X264_CPU_SLOW_PALIGNR    0x4000000  /* such as on the AMD Bobcat */
 
 /* PowerPC */
 #define X264_CPU_ALTIVEC         0x0000001
@@ -213,12 +215,13 @@
 #define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
 #define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
 #define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_I444           0x0007  /* yuv 4:4:4 planar */
-#define X264_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
-#define X264_CSP_BGR            0x0009  /* packed bgr 24bits   */
-#define X264_CSP_BGRA           0x000a  /* packed bgr 32bits   */
-#define X264_CSP_RGB            0x000b  /* packed rgb 24bits   */
-#define X264_CSP_MAX            0x000c  /* end of list */
+#define X264_CSP_V210           0x0007  /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444           0x0008  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x0009  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x000a  /* packed bgr 24bits   */
+#define X264_CSP_BGRA           0x000b  /* packed bgr 32bits   */
+#define X264_CSP_RGB            0x000c  /* packed rgb 24bits   */
+#define X264_CSP_MAX            0x000d  /* end of list */
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
@@ -319,6 +322,7 @@
     int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
     int         b_open_gop;
     int         b_bluray_compat;
+    int         i_avcintra_class;
 
     int         b_deblocking_filter;
     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
@@ -331,7 +335,7 @@
     int         b_constrained_intra;
 
     int         i_cqm_preset;
-    char        *psz_cqm_file;      /* JM format */
+    char        *psz_cqm_file;      /* filename (in UTF-8) of CQM file, JM format */
     uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
     uint8_t     cqm_4py[16];
     uint8_t     cqm_4ic[16];
@@ -345,9 +349,8 @@
     void        (*pf_log)( void *, int i_level, const char *psz, va_list );
     void        *p_log_private;
     int         i_log_level;
-    int         b_visualize;
     int         b_full_recon;   /* fully reconstruct frames, even when not necessary for encoding.  Implied by psz_dump_yuv */
-    char        *psz_dump_yuv;  /* filename for reconstructed frames */
+    char        *psz_dump_yuv;  /* filename (in UTF-8) for reconstructed frames */
 
     /* Encoder analyser parameters */
     struct
@@ -406,6 +409,10 @@
         float       f_ip_factor;
         float       f_pb_factor;
 
+        /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
+         * Implied by NAL-HRD CBR. */
+        int         b_filler;
+
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
         float       f_aq_strength;
         int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
@@ -413,9 +420,9 @@
 
         /* 2pass */
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
-        char        *psz_stat_out;
+        char        *psz_stat_out;  /* output filename (in UTF-8) of the 2pass stats file */
         int         b_stat_read;    /* Read stat from psz_stat_in and use it */
-        char        *psz_stat_in;
+        char        *psz_stat_in;   /* input filename (in UTF-8) of the 2pass stats file */
 
         /* 2pass params (same as ffmpeg ones) */
         float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
@@ -483,7 +490,7 @@
     int b_opencl;            /* use OpenCL when available */
     int i_opencl_device;     /* specify count of GPU devices to skip, for CLI users */
     void *opencl_device_id;  /* pass explicit cl_device_id as void*, for API users */
-    char *psz_clbin_file;    /* compiled OpenCL kernel cache file */
+    char *psz_clbin_file;    /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
 
     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
@@ -877,13 +884,15 @@
 /* x264_encoder_headers:
  *      return the SPS and PPS that will be used for the whole stream.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
+ *      returns the number of bytes in the returned NALs.
  *      returns negative on error.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
 /* x264_encoder_encode:
  *      encode one picture.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
- *      returns negative on error, zero if no NAL units returned.
+ *      returns the number of bytes in the returned NALs.
+ *      returns negative on error and zero if no NAL units returned.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
 /* x264_encoder_close:

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264.h: x264 public header
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -41,7 +41,7 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 135
+#define X264_BUILD 142
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -98,12 +98,15 @@
     int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
     int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
 
-    /* Size of payload in bytes. */
+    /* Size of payload (including any padding) in bytes. */
     int     i_payload;
     /* If param->b_annexb is set, Annex-B bytestream with startcode.
      * Otherwise, startcode is replaced with a 4-byte size.
      * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
     uint8_t *p_payload;
+
+    /* Size of padding in bytes. */
+    int i_padding;
 } x264_nal_t;
 
 /****************************************************************************
@@ -122,30 +125,29 @@
 #define X264_CPU_SSSE3           0x0000040
 #define X264_CPU_SSE4            0x0000080  /* SSE4.1 */
 #define X264_CPU_SSE42           0x0000100  /* SSE4.2 */
-#define X264_CPU_SSE_MISALIGN    0x0000200  /* Phenom support for misaligned SSE instruction arguments */
-#define X264_CPU_LZCNT           0x0000400  /* Phenom support for "leading zero count" instruction. */
-#define X264_CPU_AVX             0x0000800  /* AVX support: requires OS support even if YMM registers aren't used. */
-#define X264_CPU_XOP             0x0001000  /* AMD XOP */
-#define X264_CPU_FMA4            0x0002000  /* AMD FMA4 */
-#define X264_CPU_AVX2            0x0004000  /* AVX2 */
-#define X264_CPU_FMA3            0x0008000  /* Intel FMA3 */
-#define X264_CPU_BMI1            0x0010000  /* BMI1 */
-#define X264_CPU_BMI2            0x0020000  /* BMI2 */
+#define X264_CPU_LZCNT           0x0000200  /* Phenom support for "leading zero count" instruction. */
+#define X264_CPU_AVX             0x0000400  /* AVX support: requires OS support even if YMM registers aren't used. */
+#define X264_CPU_XOP             0x0000800  /* AMD XOP */
+#define X264_CPU_FMA4            0x0001000  /* AMD FMA4 */
+#define X264_CPU_AVX2            0x0002000  /* AVX2 */
+#define X264_CPU_FMA3            0x0004000  /* Intel FMA3 */
+#define X264_CPU_BMI1            0x0008000  /* BMI1 */
+#define X264_CPU_BMI2            0x0010000  /* BMI2 */
 /* x86 modifiers */
-#define X264_CPU_CACHELINE_32    0x0040000  /* avoid memory loads that span the border between two cachelines */
-#define X264_CPU_CACHELINE_64    0x0080000  /* 32/64 is the size of a cacheline in bytes */
-#define X264_CPU_SSE2_IS_SLOW    0x0100000  /* avoid most SSE2 functions on Athlon64 */
-#define X264_CPU_SSE2_IS_FAST    0x0200000  /* a few functions are only faster on Core2 and Phenom */
-#define X264_CPU_SLOW_SHUFFLE    0x0400000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
-#define X264_CPU_STACK_MOD4      0x0800000  /* if stack is only mod4 and not mod16 */
-#define X264_CPU_SLOW_CTZ        0x1000000  /* BSR/BSF x86 instructions are really slow on some CPUs */
-#define X264_CPU_SLOW_ATOM       0x2000000  /* The Atom is terrible: slow SSE unaligned loads, slow
+#define X264_CPU_CACHELINE_32    0x0020000  /* avoid memory loads that span the border between two cachelines */
+#define X264_CPU_CACHELINE_64    0x0040000  /* 32/64 is the size of a cacheline in bytes */
+#define X264_CPU_SSE2_IS_SLOW    0x0080000  /* avoid most SSE2 functions on Athlon64 */
+#define X264_CPU_SSE2_IS_FAST    0x0100000  /* a few functions are only faster on Core2 and Phenom */
+#define X264_CPU_SLOW_SHUFFLE    0x0200000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
+#define X264_CPU_STACK_MOD4      0x0400000  /* if stack is only mod4 and not mod16 */
+#define X264_CPU_SLOW_CTZ        0x0800000  /* BSR/BSF x86 instructions are really slow on some CPUs */
+#define X264_CPU_SLOW_ATOM       0x1000000  /* The Atom is terrible: slow SSE unaligned loads, slow
                                              * SIMD multiplies, slow SIMD variable shifts, slow pshufb,
                                              * cacheline split penalties -- gather everything here that
                                              * isn't shared by other CPUs to avoid making half a dozen
                                              * new SLOW flags. */
-#define X264_CPU_SLOW_PSHUFB     0x4000000  /* such as on the Intel Atom */
-#define X264_CPU_SLOW_PALIGNR    0x8000000  /* such as on the AMD Bobcat */
+#define X264_CPU_SLOW_PSHUFB     0x2000000  /* such as on the Intel Atom */
+#define X264_CPU_SLOW_PALIGNR    0x4000000  /* such as on the AMD Bobcat */
 
 /* PowerPC */
 #define X264_CPU_ALTIVEC         0x0000001
@@ -213,12 +215,13 @@
 #define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
 #define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
 #define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_I444           0x0007  /* yuv 4:4:4 planar */
-#define X264_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
-#define X264_CSP_BGR            0x0009  /* packed bgr 24bits   */
-#define X264_CSP_BGRA           0x000a  /* packed bgr 32bits   */
-#define X264_CSP_RGB            0x000b  /* packed rgb 24bits   */
-#define X264_CSP_MAX            0x000c  /* end of list */
+#define X264_CSP_V210           0x0007  /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444           0x0008  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x0009  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x000a  /* packed bgr 24bits   */
+#define X264_CSP_BGRA           0x000b  /* packed bgr 32bits   */
+#define X264_CSP_RGB            0x000c  /* packed rgb 24bits   */
+#define X264_CSP_MAX            0x000d  /* end of list */
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
@@ -319,6 +322,7 @@
     int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
     int         b_open_gop;
     int         b_bluray_compat;
+    int         i_avcintra_class;
 
     int         b_deblocking_filter;
     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
@@ -331,7 +335,7 @@
     int         b_constrained_intra;
 
     int         i_cqm_preset;
-    char        *psz_cqm_file;      /* JM format */
+    char        *psz_cqm_file;      /* filename (in UTF-8) of CQM file, JM format */
     uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
     uint8_t     cqm_4py[16];
     uint8_t     cqm_4ic[16];
@@ -345,9 +349,8 @@
     void        (*pf_log)( void *, int i_level, const char *psz, va_list );
     void        *p_log_private;
     int         i_log_level;
-    int         b_visualize;
     int         b_full_recon;   /* fully reconstruct frames, even when not necessary for encoding.  Implied by psz_dump_yuv */
-    char        *psz_dump_yuv;  /* filename for reconstructed frames */
+    char        *psz_dump_yuv;  /* filename (in UTF-8) for reconstructed frames */
 
     /* Encoder analyser parameters */
     struct
@@ -406,6 +409,10 @@
         float       f_ip_factor;
         float       f_pb_factor;
 
+        /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
+         * Implied by NAL-HRD CBR. */
+        int         b_filler;
+
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
         float       f_aq_strength;
         int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
@@ -413,9 +420,9 @@
 
         /* 2pass */
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
-        char        *psz_stat_out;
+        char        *psz_stat_out;  /* output filename (in UTF-8) of the 2pass stats file */
         int         b_stat_read;    /* Read stat from psz_stat_in and use it */
-        char        *psz_stat_in;
+        char        *psz_stat_in;   /* input filename (in UTF-8) of the 2pass stats file */
 
         /* 2pass params (same as ffmpeg ones) */
         float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
@@ -483,7 +490,7 @@
     int b_opencl;            /* use OpenCL when available */
     int i_opencl_device;     /* specify count of GPU devices to skip, for CLI users */
     void *opencl_device_id;  /* pass explicit cl_device_id as void*, for API users */
-    char *psz_clbin_file;    /* compiled OpenCL kernel cache file */
+    char *psz_clbin_file;    /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
 
     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
@@ -877,13 +884,15 @@
 /* x264_encoder_headers:
  *      return the SPS and PPS that will be used for the whole stream.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
+ *      returns the number of bytes in the returned NALs.
  *      returns negative on error.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
 /* x264_encoder_encode:
  *      encode one picture.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
- *      returns negative on error, zero if no NAL units returned.
+ *      returns the number of bytes in the returned NALs.
+ *      returns negative on error and zero if no NAL units returned.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
 /* x264_encoder_close:
​

x264-snapshot-20130723-2245.tar.bz2/x264cli.h -> x264-snapshot-20140321-2245.tar.bz2/x264cli.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264cli.h: x264cli common
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -63,6 +63,13 @@
 void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
 void x264_cli_printf( int i_level, const char *fmt, ... );
 
+#ifdef _WIN32
+void x264_cli_set_console_title( const char *title );
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file );
+#else
+#define x264_cli_set_console_title( title )
+#endif
+
 #define RETURN_IF_ERR( cond, name, ret, ... )\
 if( cond )\
 {\
​

x264-snapshot-20130723-2245.tar.bz2/x264dll.c -> x264-snapshot-20140321-2245.tar.bz2/x264dll.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264dll: x264 DLLMain for win32
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Anton Mitrofanov <BugMaster@narod.ru>
  *
@@ -27,7 +27,7 @@
 #include <windows.h>
 
 /* Callback for our DLL so we can initialize pthread */
-BOOL WINAPI DllMain( HANDLE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
+BOOL WINAPI DllMain( HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
 {
 #if PTW32_STATIC_LIB
     switch( fdwReason )
​

x264-snapshot-20130723-2245.tar.bz2/x264res.rc -> x264-snapshot-20140321-2245.tar.bz2/x264res.rc Changed

@@ -1,9 +1,9 @@
 /*****************************************************************************
  * x264res.rc: windows resource file
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
- * Authors: Henrik Gramner <hengar-6@student.ltu.se>
+ * Authors: Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@
 BEGIN
     BLOCK "StringFileInfo"
     BEGIN
-        BLOCK "040904E4"
+        BLOCK "040904B0"
         BEGIN
             VALUE "CompanyName",      "x264 project"
 #ifdef DLL
@@ -60,7 +60,7 @@
 #endif
             VALUE "FileVersion",      X264_POINTVER
             VALUE "InternalName",     "x264"
-            VALUE "LegalCopyright",   "Copyright (C) 2003-2013 x264 project"
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2014 x264 project"
 #ifdef DLL
             VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
 #else
@@ -73,6 +73,6 @@
 
     BLOCK "VarFileInfo"
     BEGIN
-        VALUE "Translation", 0x0409, 0x04E4
+        VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
     END
 END

 
@@ -1,9 +1,9 @@
 /*****************************************************************************
  * x264res.rc: windows resource file
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
- * Authors: Henrik Gramner <hengar-6@student.ltu.se>
+ * Authors: Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@
 BEGIN
     BLOCK "StringFileInfo"
     BEGIN
-        BLOCK "040904E4"
+        BLOCK "040904B0"
         BEGIN
             VALUE "CompanyName",      "x264 project"
 #ifdef DLL
@@ -60,7 +60,7 @@
 #endif
             VALUE "FileVersion",      X264_POINTVER
             VALUE "InternalName",     "x264"
-            VALUE "LegalCopyright",   "Copyright (C) 2003-2013 x264 project"
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2014 x264 project"
 #ifdef DLL
             VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
 #else
@@ -73,6 +73,6 @@
 
     BLOCK "VarFileInfo"
     BEGIN
-        VALUE "Translation", 0x0409, 0x04E4
+        VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
     END
 END
​

Changes of Revision 6