Packman Build Service PMBS

Changes of Revision 6

libx264.changes Changed

​x
 
@@ -1,4 +1,9 @@
 -------------------------------------------------------------------
+Sat Mar 22 17:10:14 UTC 2014 - i@margueirte.su
+
+- update version 20140321.
+
+-------------------------------------------------------------------
 Tue Nov 19 07:53:08 UTC 2013 - obs@botter.cc
 
 - add -fno-aggressive-loop-optimizations to extra-cflags in
​

libx264.spec Changed

 
@@ -14,8 +14,8 @@
 # Please submit bugfixes or comments via http://bugs.links2linux.org/
 
 Name:           libx264
-%define soname  135
-%define svn     20130723
+%define soname  142
+%define svn     20140321
 Version:        0.%{soname}svn%{svn}
 Release:        1
 License:        GPL-2.0+
​

x264-snapshot-20130723-2245.tar.bz2/common/display-x11.c Deleted

@@ -1,218 +0,0 @@
-/*****************************************************************************
- * display-x11.c: x11 interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include <X11/Xlib.h>
-#include <X11/Xutil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common.h"
-#include "display.h"
-
-static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask;
-
-static Display *disp_display = NULL;
-static struct disp_window
-{
-    int init;
-    Window window;
-} disp_window[10];
-
-static inline void disp_chkerror( int cond, char *e )
-{
-    if( !cond )
-        return;
-    fprintf( stderr, "error: %s\n", e ? e : "?" );
-    abort();
-}
-
-static void disp_init_display( void )
-{
-    Visual *visual;
-    int dpy_class;
-    int screen;
-    int dpy_depth;
-
-    if( disp_display )
-        return;
-    memset( &disp_window, 0, sizeof(disp_window) );
-    disp_display = XOpenDisplay( "" );
-    disp_chkerror( !disp_display, "no display" );
-    screen = DefaultScreen( disp_display );
-    visual = DefaultVisual( disp_display, screen );
-    dpy_class = visual->class;
-    dpy_depth = DefaultDepth( disp_display, screen );
-    disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32)
-        || (dpy_class == TrueColor && dpy_depth == 24)
-        || (dpy_class == TrueColor && dpy_depth == 16)
-        || (dpy_class == PseudoColor && dpy_depth == 8)),
-        "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" );
-}
-
-static void disp_init_window( int num, int width, int height, const unsigned char *title )
-{
-    XSetWindowAttributes xswa;
-    XEvent xev;
-    int screen = DefaultScreen(disp_display);
-    Visual *visual = DefaultVisual (disp_display, screen);
-    char buf[200];
-    Window window;
-
-    if( title )
-        snprintf( buf, 200, "%s: %i/disp", title, num );
-    else
-        snprintf( buf, 200, "%i/disp", num );
-
-    XSizeHints *shint = XAllocSizeHints();
-    disp_chkerror( !shint, "memerror" );
-    shint->min_width = shint->max_width = shint->width = width;
-    shint->min_height = shint->max_height = shint->height = height;
-    shint->flags = PSize | PMinSize | PMaxSize;
-    disp_chkerror( num < 0 || num >= 10, "bad win num" );
-    if( !disp_window[num].init )
-    {
-        unsigned int mask = 0;
-        disp_window[num].init = 1;
-        unsigned int bg = WhitePixel( disp_display, screen );
-        unsigned int fg = BlackPixel( disp_display, screen );
-        int dpy_depth = DefaultDepth( disp_display, screen );
-        if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 )
-        {
-            mask |= CWColormap;
-            xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone );
-        }
-        xswa.background_pixel = bg;
-        xswa.border_pixel = fg;
-        xswa.backing_store = Always;
-        xswa.backing_planes = -1;
-        xswa.bit_gravity = NorthWestGravity;
-        mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity;
-        window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ),
-                                shint->x, shint->y, shint->width, shint->height,
-                                1, dpy_depth, InputOutput, visual, mask, &xswa );
-        disp_window[num].window = window;
-
-        XSelectInput( disp_display, window, event_mask );
-        XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-        XMapWindow( disp_display, window );
-
-        do {
-            XNextEvent( disp_display, &xev );
-        } while( xev.type != MapNotify || xev.xmap.event != window );
-    }
-    window = disp_window[num].window;
-    XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-    XResizeWindow( disp_display, window, width, height );
-    XSync( disp_display, 1 );
-    XFree( shint );
-}
-
-void disp_sync( void )
-{
-    XSync( disp_display, 1 );
-}
-
-void disp_setcolor( unsigned char *name )
-{
-    XColor c_exact, c_nearest;
-
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    Colormap cm = DefaultColormap( disp_display, screen );
-    Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact );
-    disp_chkerror( st != 1, "XAllocNamedColor error" );
-    XSetForeground( disp_display, gc, c_nearest.pixel );
-}
-
-void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title )
-{
-    char dummy;
-
-    disp_init_display();
-    disp_init_window( num, width, height, title );
-    int screen = DefaultScreen( disp_display );
-    Visual *visual = DefaultVisual( disp_display, screen );
-    int dpy_depth = DefaultDepth( disp_display, screen );
-    XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 );
-    disp_chkerror( !ximage, "no ximage" );
-#if WORDS_BIGENDIAN
-    ximage->byte_order = MSBFirst;
-    ximage->bitmap_bit_order = MSBFirst;
-#else
-    ximage->byte_order = LSBFirst;
-    ximage->bitmap_bit_order = LSBFirst;
-#endif
-
-    int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char);
-    uint8_t *image = malloc( width * height * pixelsize );
-    disp_chkerror( !image, "malloc failed" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize );
-    ximage->data = image;
-    GC gc = DefaultGC( disp_display, screen );
-
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-
-    XDestroyImage( ximage );
-    XSync( disp_display, 1 );
-
-}
-
-void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom)
-{
-    unsigned char *dataz = malloc( width*zoom * height*zoom );
-    disp_chkerror( !dataz, "malloc" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            for( int y0 = 0; y0 < zoom; y0++ )
-                for( int x0 = 0; x0 < zoom; x0++ )
-                    dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x];
-    disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title );
-    free( dataz );
-}
-
-void disp_point( int num, int x1, int y1 )
-{
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    XDrawPoint( disp_display, disp_window[num].window, gc, x1, y1 );
-}
-
-void disp_line( int num, int x1, int y1, int x2, int y2 )
-{
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    XDrawLine( disp_display, disp_window[num].window, gc, x1, y1, x2, y2 );
-}
-
-void disp_rect( int num, int x1, int y1, int x2, int y2 )
-{
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    XDrawRectangle( disp_display, disp_window[num].window, gc, x1, y1, x2-x1, y2-y1 );
-}

 
@@ -1,218 +0,0 @@
-/*****************************************************************************
- * display-x11.c: x11 interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#include <X11/Xlib.h>
-#include <X11/Xutil.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common.h"
-#include "display.h"
-
-static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask;
-
-static Display *disp_display = NULL;
-static struct disp_window
-{
-    int init;
-    Window window;
-} disp_window[10];
-
-static inline void disp_chkerror( int cond, char *e )
-{
-    if( !cond )
-        return;
-    fprintf( stderr, "error: %s\n", e ? e : "?" );
-    abort();
-}
-
-static void disp_init_display( void )
-{
-    Visual *visual;
-    int dpy_class;
-    int screen;
-    int dpy_depth;
-
-    if( disp_display )
-        return;
-    memset( &disp_window, 0, sizeof(disp_window) );
-    disp_display = XOpenDisplay( "" );
-    disp_chkerror( !disp_display, "no display" );
-    screen = DefaultScreen( disp_display );
-    visual = DefaultVisual( disp_display, screen );
-    dpy_class = visual->class;
-    dpy_depth = DefaultDepth( disp_display, screen );
-    disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32)
-        || (dpy_class == TrueColor && dpy_depth == 24)
-        || (dpy_class == TrueColor && dpy_depth == 16)
-        || (dpy_class == PseudoColor && dpy_depth == 8)),
-        "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" );
-}
-
-static void disp_init_window( int num, int width, int height, const unsigned char *title )
-{
-    XSetWindowAttributes xswa;
-    XEvent xev;
-    int screen = DefaultScreen(disp_display);
-    Visual *visual = DefaultVisual (disp_display, screen);
-    char buf[200];
-    Window window;
-
-    if( title )
-        snprintf( buf, 200, "%s: %i/disp", title, num );
-    else
-        snprintf( buf, 200, "%i/disp", num );
-
-    XSizeHints *shint = XAllocSizeHints();
-    disp_chkerror( !shint, "memerror" );
-    shint->min_width = shint->max_width = shint->width = width;
-    shint->min_height = shint->max_height = shint->height = height;
-    shint->flags = PSize | PMinSize | PMaxSize;
-    disp_chkerror( num < 0 || num >= 10, "bad win num" );
-    if( !disp_window[num].init )
-    {
-        unsigned int mask = 0;
-        disp_window[num].init = 1;
-        unsigned int bg = WhitePixel( disp_display, screen );
-        unsigned int fg = BlackPixel( disp_display, screen );
-        int dpy_depth = DefaultDepth( disp_display, screen );
-        if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 )
-        {
-            mask |= CWColormap;
-            xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone );
-        }
-        xswa.background_pixel = bg;
-        xswa.border_pixel = fg;
-        xswa.backing_store = Always;
-        xswa.backing_planes = -1;
-        xswa.bit_gravity = NorthWestGravity;
-        mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity;
-        window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ),
-                                shint->x, shint->y, shint->width, shint->height,
-                                1, dpy_depth, InputOutput, visual, mask, &xswa );
-        disp_window[num].window = window;
-
-        XSelectInput( disp_display, window, event_mask );
-        XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-        XMapWindow( disp_display, window );
-
-        do {
-            XNextEvent( disp_display, &xev );
-        } while( xev.type != MapNotify || xev.xmap.event != window );
-    }
-    window = disp_window[num].window;
-    XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
-    XResizeWindow( disp_display, window, width, height );
-    XSync( disp_display, 1 );
-    XFree( shint );
-}
-
-void disp_sync( void )
-{
-    XSync( disp_display, 1 );
-}
-
-void disp_setcolor( unsigned char *name )
-{
-    XColor c_exact, c_nearest;
-
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    Colormap cm = DefaultColormap( disp_display, screen );
-    Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact );
-    disp_chkerror( st != 1, "XAllocNamedColor error" );
-    XSetForeground( disp_display, gc, c_nearest.pixel );
-}
-
-void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title )
-{
-    char dummy;
-
-    disp_init_display();
-    disp_init_window( num, width, height, title );
-    int screen = DefaultScreen( disp_display );
-    Visual *visual = DefaultVisual( disp_display, screen );
-    int dpy_depth = DefaultDepth( disp_display, screen );
-    XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 );
-    disp_chkerror( !ximage, "no ximage" );
-#if WORDS_BIGENDIAN
-    ximage->byte_order = MSBFirst;
-    ximage->bitmap_bit_order = MSBFirst;
-#else
-    ximage->byte_order = LSBFirst;
-    ximage->bitmap_bit_order = LSBFirst;
-#endif
-
-    int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char);
-    uint8_t *image = malloc( width * height * pixelsize );
-    disp_chkerror( !image, "malloc failed" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize );
-    ximage->data = image;
-    GC gc = DefaultGC( disp_display, screen );
-
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-    XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
-
-    XDestroyImage( ximage );
-    XSync( disp_display, 1 );
-
-}
-
-void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom)
-{
-    unsigned char *dataz = malloc( width*zoom * height*zoom );
-    disp_chkerror( !dataz, "malloc" );
-    for( int y = 0; y < height; y++ )
-        for( int x = 0; x < width; x++ )
-            for( int y0 = 0; y0 < zoom; y0++ )
-                for( int x0 = 0; x0 < zoom; x0++ )
-                    dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x];
-    disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title );
-    free( dataz );
-}
-
-void disp_point( int num, int x1, int y1 )
-{
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    XDrawPoint( disp_display, disp_window[num].window, gc, x1, y1 );
-}
-
-void disp_line( int num, int x1, int y1, int x2, int y2 )
-{
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    XDrawLine( disp_display, disp_window[num].window, gc, x1, y1, x2, y2 );
-}
-
-void disp_rect( int num, int x1, int y1, int x2, int y2 )
-{
-    int screen = DefaultScreen( disp_display );
-    GC gc = DefaultGC( disp_display, screen );
-    XDrawRectangle( disp_display, disp_window[num].window, gc, x1, y1, x2-x1, y2-y1 );
-}
​

x264-snapshot-20130723-2245.tar.bz2/common/display.h Deleted

@@ -1,41 +0,0 @@
-/*****************************************************************************
- * display.h: x11 visualization interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_DISPLAY_H
-#define X264_DISPLAY_H
-
-void disp_sync(void);
-void disp_setcolor(unsigned char *name);
-/* Display a region of byte wide memory as a grayscale image.
- * num is the window to use for displaying. */
-void disp_gray(int num, char *data, int width, int height,
-               int stride, const unsigned char *title);
-void disp_gray_zoom(int num, char *data, int width, int height,
-               int stride, const unsigned char *title, int zoom);
-void disp_point(int num, int x1, int y1);
-void disp_line(int num, int x1, int y1, int x2, int y2);
-void disp_rect(int num, int x1, int y1, int x2, int y2);
-
-#endif

 
@@ -1,41 +0,0 @@
-/*****************************************************************************
- * display.h: x11 visualization interface
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_DISPLAY_H
-#define X264_DISPLAY_H
-
-void disp_sync(void);
-void disp_setcolor(unsigned char *name);
-/* Display a region of byte wide memory as a grayscale image.
- * num is the window to use for displaying. */
-void disp_gray(int num, char *data, int width, int height,
-               int stride, const unsigned char *title);
-void disp_gray_zoom(int num, char *data, int width, int height,
-               int stride, const unsigned char *title, int zoom);
-void disp_point(int num, int x1, int y1);
-void disp_line(int num, int x1, int y1, int x2, int y2);
-void disp_rect(int num, int x1, int y1, int x2, int y2);
-
-#endif
​

x264-snapshot-20130723-2245.tar.bz2/common/visualize.c Deleted

@@ -1,341 +0,0 @@
-/*****************************************************************************
- * visualize.c: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-/*
- * Some explanation of the symbols used:
- * Red/pink: intra block
- * Blue: inter block
- * Green: skip block
- * Yellow: B-block (not visualized properly yet)
- *
- * Motion vectors have black dot at their target (ie. at the MB center),
- * instead of arrowhead. The black dot is enclosed in filled diamond with radius
- * depending on reference frame number (one frame back = zero width, normal case).
- *
- * The intra blocks have generally lines drawn perpendicular
- * to the prediction direction, so for example, if there is a pink block
- * with horizontal line at the top of it, it is interpolated by assuming
- * luma to be vertically constant.
- * DC predicted blocks have both horizontal and vertical lines,
- * pink blocks with a diagonal line are predicted using the planar function.
- */
-
-#include "common.h"
-#include "visualize.h"
-#include "display.h"
-
-typedef struct
-{
-    int     i_type;
-    int     i_partition;
-    int     i_sub_partition[4];
-    int     i_intra16x16_pred_mode;
-    int     intra4x4_pred_mode[4][4];
-    int8_t  ref[2][4][4];                  /* [list][y][x] */
-    int16_t mv[2][4][4][2];                /* [list][y][x][mvxy] */
-} visualize_t;
-
-/* Return string from stringlist corresponding to the given code */
-#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code)
-
-typedef struct
-{
-    int code;
-    char *string;
-} stringlist_t;
-
-static char *get_string( const stringlist_t *sl, int entries, int code )
-{
-    for( int i = 0; i < entries; i++ )
-        if( sl[i].code == code )
-            return sl[i].string;
-    return "?";
-}
-
-/* Plot motion vector */
-static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
-{
-    int dx = dmv[0];
-    int dy = dmv[1];
-
-    dx = (dx * zoom + 2) >> 2;
-    dy = (dy * zoom + 2) >> 2;
-    disp_line( 0, x0, y0, x0+dx, y0+dy );
-    for( int i = 1; i < ref; i++ )
-    {
-        disp_line( 0, x0  , y0-i, x0+i, y0   );
-        disp_line( 0, x0+i, y0  , x0  , y0+i );
-        disp_line( 0, x0  , y0+i, x0-i, y0   );
-        disp_line( 0, x0-i, y0  , x0  , y0-i );
-    }
-    disp_setcolor( "black" );
-    disp_point( 0, x0, y0 );
-    disp_setcolor( col );
-}
-
-int x264_visualize_init( x264_t *h )
-{
-    CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
-    return 0;
-fail:
-    return -1;
-}
-
-void x264_visualize_mb( x264_t *h )
-{
-    visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy;
-
-    /* Save all data for the MB that we need for drawing the visualization */
-    v->i_type = h->mb.i_type;
-    v->i_partition = h->mb.i_partition;
-    for( int i = 0; i < 4; i++ )
-        v->i_sub_partition[i] = h->mb.i_sub_partition[i];
-    for( int y = 0; y < 4; y++ )
-        for( int x = 0; x < 4; x++ )
-            v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x];
-    for( int l = 0; l < 2; l++ )
-        for( int y = 0; y < 4; y++ )
-            for( int x = 0; x < 4; x++ )
-            {
-                for( int i = 0; i < 2; i++ )
-                    v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i];
-                v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x];
-            }
-    v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode;
-}
-
-void x264_visualize_close( x264_t *h )
-{
-    x264_free(h->visualize);
-}
-
-/* Display visualization (block types, MVs) of the encoded frame */
-/* FIXME: B-type MBs not handled yet properly */
-void x264_visualize_show( x264_t *h )
-{
-    static const stringlist_t mb_types[] =
-    {
-        /* Block types marked as NULL will not be drawn */
-        { I_4x4   , "red" },
-        { I_8x8   , "#ff5640" },
-        { I_16x16 , "#ff8060" },
-        { I_PCM   , "violet" },
-        { P_L0    , "SlateBlue" },
-        { P_8x8   , "blue" },
-        { P_SKIP  , "green" },
-        { B_DIRECT, "yellow" },
-        { B_L0_L0 , "yellow" },
-        { B_L0_L1 , "yellow" },
-        { B_L0_BI , "yellow" },
-        { B_L1_L0 , "yellow" },
-        { B_L1_L1 , "yellow" },
-        { B_L1_BI , "yellow" },
-        { B_BI_L0 , "yellow" },
-        { B_BI_L1 , "yellow" },
-        { B_BI_BI , "yellow" },
-        { B_8x8   , "yellow" },
-        { B_SKIP  , "yellow" },
-    };
-
-    static const int waitkey = 1;     /* Wait for enter after each frame */
-    static const int drawbox = 1;     /* Draw box around each block */
-    static const int borders = 0;     /* Display extrapolated borders outside frame */
-    static const int zoom = 2;        /* Zoom factor */
-
-    static const int pad = 32;
-    pixel *const frame = h->fdec->plane[0];
-    const int width = h->param.i_width;
-    const int height = h->param.i_height;
-    const int stride = h->fdec->i_stride[0];
-
-    if( borders )
-        disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom );
-    else
-        disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
-
-    for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
-    {
-        visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
-        const int mb_y = mb_xy / h->mb.i_mb_width;
-        const int mb_x = mb_xy % h->mb.i_mb_width;
-        char *const col = GET_STRING( mb_types, v->i_type );
-        int x = mb_x*16*zoom;
-        int y = mb_y*16*zoom;
-        int l = 0;
-
-        if( !col )
-            continue;
-
-        if( borders )
-        {
-            x += pad*zoom;
-            y += pad*zoom;
-        }
-
-        disp_setcolor( col );
-        if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 );
-
-        if( v->i_type==P_L0 || v->i_type==P_8x8 || v->i_type==P_SKIP )
-        {
-            /* Predicted (inter) mode, with motion vector */
-            if( v->i_partition == D_16x16 || v->i_type == P_SKIP )
-                mv( x+8*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
-            else if (v->i_partition == D_16x8)
-            {
-                if( drawbox ) disp_rect( 0, x, y, x+16*zoom, y+8*zoom );
-                mv( x+8*zoom, y+4*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
-                if( drawbox ) disp_rect( 0, x, y+8*zoom, x+16*zoom, y+16*zoom );
-                mv( x+8*zoom, y+12*zoom, v->mv[l][2][0], v->ref[l][2][0], zoom, col );
-            }
-            else if( v->i_partition==D_8x16 )
-            {
-                if( drawbox ) disp_rect( 0, x,          y, x+8*zoom,  y+16*zoom );
-                mv( x+4*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
-                if( drawbox ) disp_rect( 0, x+8*zoom,   y, x+16*zoom, y+16*zoom );
-                mv( x+12*zoom, y+8*zoom, v->mv[l][0][2], v->ref[l][0][2], zoom, col );
-            }
-            else if( v->i_partition==D_8x8 )
-            {
-                for( int i = 0; i < 2; i++ )
-                    for( int j = 0; j < 2; j++ )
-                    {
-                        int sp = v->i_sub_partition[i*2+j];
-                        const int x0 = x + j*8*zoom;
-                        const int y0 = y + i*8*zoom;
-                        l = x264_mb_partition_listX_table[0][sp] ? 0 : 1; /* FIXME: not tested if this works */
-                        if( IS_SUB8x8(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+8*zoom );
-                            mv( x0+4*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                        }
-                        else if( IS_SUB8x4(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+4*zoom );
-                            if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+8*zoom, y0+8*zoom );
-                            mv( x0+4*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                            mv( x0+4*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
-                        }
-                        else if( IS_SUB4x8(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+8*zoom );
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+8*zoom );
-                            mv( x0+2*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                            mv( x0+6*zoom, y0+4*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
-                        }
-                        else if( IS_SUB4x4(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+4*zoom );
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+4*zoom );
-                            if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+4*zoom, y0+8*zoom );
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0+4*zoom, x0+8*zoom, y0+8*zoom );
-                            mv( x0+2*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                            mv( x0+6*zoom, y0+2*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
-                            mv( x0+2*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
-                            mv( x0+6*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j+1], v->ref[l][2*i+1][2*j+1], zoom, col );
-                        }
-                    }
-            }
-        }
-
-        if( IS_INTRA(v->i_type) || v->i_type == I_PCM )
-        {
-            /* Intra coded */
-            if( v->i_type == I_16x16 )
-            {
-                switch (v->i_intra16x16_pred_mode) {
-                case I_PRED_16x16_V:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
-                    break;
-                case I_PRED_16x16_H:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
-                    break;
-                case I_PRED_16x16_DC:
-                case I_PRED_16x16_DC_LEFT:
-                case I_PRED_16x16_DC_TOP:
-                case I_PRED_16x16_DC_128:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
-                    break;
-                case I_PRED_16x16_P:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+8*zoom, y+8*zoom );
-                    break;
-                }
-            }
-            if( v->i_type==I_4x4 || v->i_type==I_8x8 )
-            {
-                const int di = v->i_type == I_8x8 ? 2 : 1;
-                const int zoom2 = zoom * di;
-                for( int i = 0; i < 4; i += di )
-                    for( int j = 0; j < 4; j += di )
-                    {
-                        const int x0 = x + j*4*zoom;
-                        const int y0 = y + i*4*zoom;
-                        if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom2, y0+4*zoom2 );
-                        switch( v->intra4x4_pred_mode[i][j] )
-                        {
-                            case I_PRED_4x4_V:        /* Vertical */
-                                disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
-                                break;
-                            case I_PRED_4x4_H:        /* Horizontal */
-                                disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_DC:        /* DC, average from top and left sides */
-                            case I_PRED_4x4_DC_LEFT:
-                            case I_PRED_4x4_DC_TOP:
-                            case I_PRED_4x4_DC_128:
-                                disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
-                                disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_DDL:    /* Topright-bottomleft */
-                                disp_line( 0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_DDR:    /* Topleft-bottomright */
-                                disp_line( 0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2 );
-                                break;
-                            case I_PRED_4x4_VR:        /* Mix of topleft-bottomright and vertical */
-                                disp_line( 0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2 );
-                                break;
-                            case I_PRED_4x4_HD:        /* Mix of topleft-bottomright and horizontal */
-                                disp_line( 0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_VL:        /* Mix of topright-bottomleft and vertical */
-                                disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2 );
-                                break;
-                            case I_PRED_4x4_HU:        /* Mix of topright-bottomleft and horizontal */
-                                disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2 );
-                                break;
-                        }
-                    }
-            }
-        }
-    }
-
-    disp_sync();
-    if( waitkey )
-        getchar();
-}
-/* }}} */
-
-//EOF

 
@@ -1,341 +0,0 @@
-/*****************************************************************************
- * visualize.c: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-/*
- * Some explanation of the symbols used:
- * Red/pink: intra block
- * Blue: inter block
- * Green: skip block
- * Yellow: B-block (not visualized properly yet)
- *
- * Motion vectors have black dot at their target (ie. at the MB center),
- * instead of arrowhead. The black dot is enclosed in filled diamond with radius
- * depending on reference frame number (one frame back = zero width, normal case).
- *
- * The intra blocks have generally lines drawn perpendicular
- * to the prediction direction, so for example, if there is a pink block
- * with horizontal line at the top of it, it is interpolated by assuming
- * luma to be vertically constant.
- * DC predicted blocks have both horizontal and vertical lines,
- * pink blocks with a diagonal line are predicted using the planar function.
- */
-
-#include "common.h"
-#include "visualize.h"
-#include "display.h"
-
-typedef struct
-{
-    int     i_type;
-    int     i_partition;
-    int     i_sub_partition[4];
-    int     i_intra16x16_pred_mode;
-    int     intra4x4_pred_mode[4][4];
-    int8_t  ref[2][4][4];                  /* [list][y][x] */
-    int16_t mv[2][4][4][2];                /* [list][y][x][mvxy] */
-} visualize_t;
-
-/* Return string from stringlist corresponding to the given code */
-#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code)
-
-typedef struct
-{
-    int code;
-    char *string;
-} stringlist_t;
-
-static char *get_string( const stringlist_t *sl, int entries, int code )
-{
-    for( int i = 0; i < entries; i++ )
-        if( sl[i].code == code )
-            return sl[i].string;
-    return "?";
-}
-
-/* Plot motion vector */
-static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
-{
-    int dx = dmv[0];
-    int dy = dmv[1];
-
-    dx = (dx * zoom + 2) >> 2;
-    dy = (dy * zoom + 2) >> 2;
-    disp_line( 0, x0, y0, x0+dx, y0+dy );
-    for( int i = 1; i < ref; i++ )
-    {
-        disp_line( 0, x0  , y0-i, x0+i, y0   );
-        disp_line( 0, x0+i, y0  , x0  , y0+i );
-        disp_line( 0, x0  , y0+i, x0-i, y0   );
-        disp_line( 0, x0-i, y0  , x0  , y0-i );
-    }
-    disp_setcolor( "black" );
-    disp_point( 0, x0, y0 );
-    disp_setcolor( col );
-}
-
-int x264_visualize_init( x264_t *h )
-{
-    CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
-    return 0;
-fail:
-    return -1;
-}
-
-void x264_visualize_mb( x264_t *h )
-{
-    visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy;
-
-    /* Save all data for the MB that we need for drawing the visualization */
-    v->i_type = h->mb.i_type;
-    v->i_partition = h->mb.i_partition;
-    for( int i = 0; i < 4; i++ )
-        v->i_sub_partition[i] = h->mb.i_sub_partition[i];
-    for( int y = 0; y < 4; y++ )
-        for( int x = 0; x < 4; x++ )
-            v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x];
-    for( int l = 0; l < 2; l++ )
-        for( int y = 0; y < 4; y++ )
-            for( int x = 0; x < 4; x++ )
-            {
-                for( int i = 0; i < 2; i++ )
-                    v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i];
-                v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x];
-            }
-    v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode;
-}
-
-void x264_visualize_close( x264_t *h )
-{
-    x264_free(h->visualize);
-}
-
-/* Display visualization (block types, MVs) of the encoded frame */
-/* FIXME: B-type MBs not handled yet properly */
-void x264_visualize_show( x264_t *h )
-{
-    static const stringlist_t mb_types[] =
-    {
-        /* Block types marked as NULL will not be drawn */
-        { I_4x4   , "red" },
-        { I_8x8   , "#ff5640" },
-        { I_16x16 , "#ff8060" },
-        { I_PCM   , "violet" },
-        { P_L0    , "SlateBlue" },
-        { P_8x8   , "blue" },
-        { P_SKIP  , "green" },
-        { B_DIRECT, "yellow" },
-        { B_L0_L0 , "yellow" },
-        { B_L0_L1 , "yellow" },
-        { B_L0_BI , "yellow" },
-        { B_L1_L0 , "yellow" },
-        { B_L1_L1 , "yellow" },
-        { B_L1_BI , "yellow" },
-        { B_BI_L0 , "yellow" },
-        { B_BI_L1 , "yellow" },
-        { B_BI_BI , "yellow" },
-        { B_8x8   , "yellow" },
-        { B_SKIP  , "yellow" },
-    };
-
-    static const int waitkey = 1;     /* Wait for enter after each frame */
-    static const int drawbox = 1;     /* Draw box around each block */
-    static const int borders = 0;     /* Display extrapolated borders outside frame */
-    static const int zoom = 2;        /* Zoom factor */
-
-    static const int pad = 32;
-    pixel *const frame = h->fdec->plane[0];
-    const int width = h->param.i_width;
-    const int height = h->param.i_height;
-    const int stride = h->fdec->i_stride[0];
-
-    if( borders )
-        disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom );
-    else
-        disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
-
-    for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
-    {
-        visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
-        const int mb_y = mb_xy / h->mb.i_mb_width;
-        const int mb_x = mb_xy % h->mb.i_mb_width;
-        char *const col = GET_STRING( mb_types, v->i_type );
-        int x = mb_x*16*zoom;
-        int y = mb_y*16*zoom;
-        int l = 0;
-
-        if( !col )
-            continue;
-
-        if( borders )
-        {
-            x += pad*zoom;
-            y += pad*zoom;
-        }
-
-        disp_setcolor( col );
-        if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 );
-
-        if( v->i_type==P_L0 || v->i_type==P_8x8 || v->i_type==P_SKIP )
-        {
-            /* Predicted (inter) mode, with motion vector */
-            if( v->i_partition == D_16x16 || v->i_type == P_SKIP )
-                mv( x+8*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
-            else if (v->i_partition == D_16x8)
-            {
-                if( drawbox ) disp_rect( 0, x, y, x+16*zoom, y+8*zoom );
-                mv( x+8*zoom, y+4*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
-                if( drawbox ) disp_rect( 0, x, y+8*zoom, x+16*zoom, y+16*zoom );
-                mv( x+8*zoom, y+12*zoom, v->mv[l][2][0], v->ref[l][2][0], zoom, col );
-            }
-            else if( v->i_partition==D_8x16 )
-            {
-                if( drawbox ) disp_rect( 0, x,          y, x+8*zoom,  y+16*zoom );
-                mv( x+4*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
-                if( drawbox ) disp_rect( 0, x+8*zoom,   y, x+16*zoom, y+16*zoom );
-                mv( x+12*zoom, y+8*zoom, v->mv[l][0][2], v->ref[l][0][2], zoom, col );
-            }
-            else if( v->i_partition==D_8x8 )
-            {
-                for( int i = 0; i < 2; i++ )
-                    for( int j = 0; j < 2; j++ )
-                    {
-                        int sp = v->i_sub_partition[i*2+j];
-                        const int x0 = x + j*8*zoom;
-                        const int y0 = y + i*8*zoom;
-                        l = x264_mb_partition_listX_table[0][sp] ? 0 : 1; /* FIXME: not tested if this works */
-                        if( IS_SUB8x8(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+8*zoom );
-                            mv( x0+4*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                        }
-                        else if( IS_SUB8x4(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+4*zoom );
-                            if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+8*zoom, y0+8*zoom );
-                            mv( x0+4*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                            mv( x0+4*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
-                        }
-                        else if( IS_SUB4x8(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+8*zoom );
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+8*zoom );
-                            mv( x0+2*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                            mv( x0+6*zoom, y0+4*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
-                        }
-                        else if( IS_SUB4x4(sp) )
-                        {
-                            if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+4*zoom );
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+4*zoom );
-                            if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+4*zoom, y0+8*zoom );
-                            if( drawbox ) disp_rect( 0, x0+4*zoom, y0+4*zoom, x0+8*zoom, y0+8*zoom );
-                            mv( x0+2*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
-                            mv( x0+6*zoom, y0+2*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
-                            mv( x0+2*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
-                            mv( x0+6*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j+1], v->ref[l][2*i+1][2*j+1], zoom, col );
-                        }
-                    }
-            }
-        }
-
-        if( IS_INTRA(v->i_type) || v->i_type == I_PCM )
-        {
-            /* Intra coded */
-            if( v->i_type == I_16x16 )
-            {
-                switch (v->i_intra16x16_pred_mode) {
-                case I_PRED_16x16_V:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
-                    break;
-                case I_PRED_16x16_H:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
-                    break;
-                case I_PRED_16x16_DC:
-                case I_PRED_16x16_DC_LEFT:
-                case I_PRED_16x16_DC_TOP:
-                case I_PRED_16x16_DC_128:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
-                    break;
-                case I_PRED_16x16_P:
-                    disp_line( 0, x+2*zoom, y+2*zoom, x+8*zoom, y+8*zoom );
-                    break;
-                }
-            }
-            if( v->i_type==I_4x4 || v->i_type==I_8x8 )
-            {
-                const int di = v->i_type == I_8x8 ? 2 : 1;
-                const int zoom2 = zoom * di;
-                for( int i = 0; i < 4; i += di )
-                    for( int j = 0; j < 4; j += di )
-                    {
-                        const int x0 = x + j*4*zoom;
-                        const int y0 = y + i*4*zoom;
-                        if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom2, y0+4*zoom2 );
-                        switch( v->intra4x4_pred_mode[i][j] )
-                        {
-                            case I_PRED_4x4_V:        /* Vertical */
-                                disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
-                                break;
-                            case I_PRED_4x4_H:        /* Horizontal */
-                                disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_DC:        /* DC, average from top and left sides */
-                            case I_PRED_4x4_DC_LEFT:
-                            case I_PRED_4x4_DC_TOP:
-                            case I_PRED_4x4_DC_128:
-                                disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
-                                disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_DDL:    /* Topright-bottomleft */
-                                disp_line( 0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_DDR:    /* Topleft-bottomright */
-                                disp_line( 0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2 );
-                                break;
-                            case I_PRED_4x4_VR:        /* Mix of topleft-bottomright and vertical */
-                                disp_line( 0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2 );
-                                break;
-                            case I_PRED_4x4_HD:        /* Mix of topleft-bottomright and horizontal */
-                                disp_line( 0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
-                                break;
-                            case I_PRED_4x4_VL:        /* Mix of topright-bottomleft and vertical */
-                                disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2 );
-                                break;
-                            case I_PRED_4x4_HU:        /* Mix of topright-bottomleft and horizontal */
-                                disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2 );
-                                break;
-                        }
-                    }
-            }
-        }
-    }
-
-    disp_sync();
-    if( waitkey )
-        getchar();
-}
-/* }}} */
-
-//EOF
​

x264-snapshot-20130723-2245.tar.bz2/common/visualize.h Deleted

@@ -1,36 +0,0 @@
-/*****************************************************************************
- * visualize.h: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_VISUALIZE_H
-#define X264_VISUALIZE_H
-
-#include "common/common.h"
-
-int  x264_visualize_init( x264_t *h );
-void x264_visualize_mb( x264_t *h );
-void x264_visualize_show( x264_t *h );
-void x264_visualize_close( x264_t *h );
-
-#endif

 
@@ -1,36 +0,0 @@
-/*****************************************************************************
- * visualize.h: visualization
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
- *
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
-
-#ifndef X264_VISUALIZE_H
-#define X264_VISUALIZE_H
-
-#include "common/common.h"
-
-int  x264_visualize_init( x264_t *h );
-void x264_visualize_mb( x264_t *h );
-void x264_visualize_show( x264_t *h );
-void x264_visualize_close( x264_t *h );
-
-#endif
​

x264-snapshot-20130723-2245.tar.bz2/tools/xyuv.c Deleted

@@ -1,792 +0,0 @@
-/*****************************************************************************
- * xyuv.c: a SDL yuv 420 planer viewer.
- *****************************************************************************
- * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-
-#include <SDL/SDL.h>
-
-#define YUV_MAX 20
-#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps"
-typedef struct
-{
-    /* globals */
-    int     i_width;
-    int     i_height;
-    int     i_frame_size;
-    int     i_frame;
-    int     i_frames;
-    float   f_fps;
-
-    float   f_y;
-
-    int     b_pause;
-    int     b_grid;
-    int     b_split;
-    int     b_diff;
-    int     i_join;
-
-    /* Constructed picture */
-    int     i_wall_width;   /* in picture count */
-
-    /* YUV files */
-    int     i_yuv;
-    struct
-    {
-        char    *name;
-        FILE    *f;         /* handles */
-        int     i_frames;   /* frames count */
-
-        /* Position in the whole picture */
-        int     x, y;
-    } yuv[YUV_MAX];
-
-    /* SDL */
-    int i_sdl_width;
-    int i_sdl_height;
-
-    int i_display_width;
-    int i_display_height;
-    char *title;
-
-    SDL_Surface *screen;
-    SDL_Overlay *overlay;
-
-    /* */
-    uint8_t *pic;
-
-} xyuv_t;
-
-xyuv_t xyuv = {
-    .i_width = 0,
-    .i_height = 0,
-    .i_frame  = 1,
-    .i_frames = 0,
-    .f_fps = 25.0,
-    .f_y = 0.0,
-    .i_wall_width = 0,
-
-    .i_yuv = 0,
-
-    .b_pause = 0,
-    .b_split = 0,
-    .b_diff = 0,
-    .i_join = -1,
-
-    .title = NULL,
-    .pic = NULL,
-};
-
-static void help( void )
-{
-    fprintf( stderr,
-             "Syntax: xyuv [options] file [file2 ...]\n"
-             "\n"
-             "      --help                  Print this help\n"
-             "\n"
-             "  -s, --size <WIDTHxHEIGHT>   Set input size\n"
-             "  -w, --width <integer>       Set width\n"
-             "  -h, --height <integer>      Set height\n"
-             "\n"
-             "  -S, --split                 Show splited Y/U/V planes\n"
-             "  -d, --diff                  Show difference (only 2 files) in split mode\n"
-             "  -j, --joint <integer>\n"
-             "\n"
-             "  -y <float>                  Set Y factor\n"
-             "\n"
-             "  -g, --grid                  Show a grid (macroblock 16x16)\n"
-             "  -W <integer>                Set wall width (in picture count)\n"
-             "  -f, --fps <float>           Set fps\n"
-             "\n" );
-}
-
-static void xyuv_count_frames( xyuv_t *xyuv );
-static void xyuv_detect( int *pi_width, int *pi_height );
-static void xyuv_display( xyuv_t *xyuv, int i_frame );
-
-int main( int argc, char **argv )
-{
-    int i;
-
-    /* Parse commande line */
-    for( i = 1; i < argc; i++ ) {
-        if( !strcasecmp( argv[i], "--help" ) ) {
-            help();
-            return 0;
-        }
-        if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) {
-            xyuv.b_diff = 1;
-        } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) {
-            xyuv.b_split = 1;
-        } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_fps = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_height = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) {
-            char *p;
-
-            if( i >= argc -1 ) goto err_missing_arg;
-
-            xyuv.i_width = strtol( argv[++i], &p, 0 );
-            p++;
-            xyuv.i_height = atoi( p );
-        } else if( !strcmp( argv[i], "-W" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_wall_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-y" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_y = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_join = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) {
-            xyuv.b_grid = 1;
-        } else {
-            FILE *f = fopen( argv[i], "rb" );
-            if( !f ) {
-                fprintf( stderr, "cannot open YUV %s\n", argv[i] );
-            } else {
-                xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] );
-                xyuv.yuv[xyuv.i_yuv].f = f;
-                xyuv.yuv[xyuv.i_yuv].i_frames = 0;
-
-                xyuv.i_yuv++;
-            }
-        }
-    }
-
-    if( xyuv.i_yuv == 0 ) {
-        fprintf( stderr, "no file to display\n" );
-        return -1;
-    }
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        char *psz = xyuv.yuv[0].name;
-        char *num;
-        char *x;
-        /* See if we find widthxheight in the file name */
-        for( ;; ) {
-            if( !( x = strchr( psz+1, 'x' ) ) ) {
-                break;
-            }
-            num = x;
-            while( num > psz && num[-1] >= '0' && num[-1] <= '9' )
-                num--;
-
-            if( num != x && x[1] >= '0' && x[1] <= '9' ) {
-                xyuv.i_width = atoi( num );
-                xyuv.i_height = atoi( x+1 );
-                break;
-            }
-            psz = x;
-        }
-        fprintf( stderr, "file name gives %dx%d\n", xyuv.i_width, xyuv.i_height );
-    }
-
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        xyuv_detect( &xyuv.i_width, &xyuv.i_height );
-    }
-
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        fprintf( stderr, "invalid or missing frames size\n" );
-        return -1;
-    }
-    if( xyuv.b_diff && xyuv.i_yuv != 2 ) {
-        fprintf( stderr, "--diff works only with 2 files\n" );
-        return -1;
-    }
-    if( (xyuv.i_join == 0 || xyuv.i_join >= xyuv.i_width) && xyuv.i_yuv != 2 ) {
-        fprintf( stderr, "--join woeks only with two files and range is [1, width-1]\n" );
-        return -1;
-    }
-    if( xyuv.i_join % 2 != 0 ) {
-        if( xyuv.i_join + 1 < xyuv.i_width )
-            xyuv.i_join++;
-        else
-            xyuv.i_join--;
-    }
-
-    /* Now check frames */
-    fprintf( stderr, "displaying :\n" );
-    xyuv.i_frame_size = 3 * xyuv.i_width * xyuv.i_height / 2;
-    xyuv_count_frames( &xyuv );
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
-        fprintf( stderr, " - '%s' : %d frames\n", xyuv.yuv[i].name, xyuv.yuv[i].i_frames );
-    }
-
-    if( xyuv.i_frames == 0 ) {
-        fprintf( stderr, "no frames to display\n" );
-    }
-
-    xyuv.pic = malloc( xyuv.i_frame_size );
-
-    /* calculate SDL view */
-    if( xyuv.i_wall_width > xyuv.i_yuv ) {
-        xyuv.i_wall_width = xyuv.i_yuv;
-    }
-    if( xyuv.i_wall_width == 0 ) {
-        while( xyuv.i_wall_width < xyuv.i_yuv && xyuv.i_wall_width * xyuv.i_wall_width < xyuv.i_yuv ) {
-            xyuv.i_wall_width++;
-        }
-    }
-
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
-        if( xyuv.b_diff || xyuv.i_join > 0 ) {
-            xyuv.yuv[i].x = 0;
-            xyuv.yuv[i].y = 0;
-        } else if( xyuv.b_split ) {
-            xyuv.yuv[i].x = (i%xyuv.i_wall_width) * 3 * xyuv.i_width / 2;
-            xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
-        } else {
-            xyuv.yuv[i].x = (i%xyuv.i_wall_width) * xyuv.i_width;
-            xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
-        }
-    }
-    if( xyuv.b_diff ) {
-        xyuv.i_sdl_width = 3 * xyuv.i_width / 2;
-        xyuv.i_sdl_height= xyuv.i_height;
-    } else if( xyuv.i_join > 0 ) {
-        xyuv.i_sdl_width = xyuv.i_width;
-        xyuv.i_sdl_height= xyuv.i_height;
-    } else if( xyuv.b_split ) {
-        xyuv.i_sdl_width = xyuv.i_wall_width * 3 * xyuv.i_width / 2;
-        xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv  + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
-    } else {
-        xyuv.i_sdl_width = xyuv.i_wall_width * xyuv.i_width;
-        xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv  + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
-    }
-    xyuv.i_display_width = xyuv.i_sdl_width;
-    xyuv.i_display_height = xyuv.i_sdl_height;
-
-    /* Open SDL */
-    if( SDL_Init( SDL_INIT_EVENTTHREAD|SDL_INIT_NOPARACHUTE|SDL_INIT_VIDEO) ) {
-        fprintf( stderr, "cannot init SDL\n" );
-        return -1;
-    }
-
-    SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, 100 );
-    SDL_EventState( SDL_KEYUP, SDL_IGNORE );
-
-    xyuv.screen = SDL_SetVideoMode( xyuv.i_sdl_width, xyuv.i_sdl_height, 0,
-                                    SDL_HWSURFACE|SDL_RESIZABLE|
-                                    SDL_ASYNCBLIT|SDL_HWACCEL );
-    if( xyuv.screen == NULL ) {
-        fprintf( stderr, "SDL_SetVideoMode failed\n" );
-        return -1;
-    }
-
-    SDL_LockSurface( xyuv.screen );
-    xyuv.overlay = SDL_CreateYUVOverlay( xyuv.i_sdl_width, xyuv.i_sdl_height,
-                                         SDL_YV12_OVERLAY,
-                                         xyuv.screen );
-    /* reset with black */
-    memset( xyuv.overlay->pixels[0],   0, xyuv.overlay->pitches[0] * xyuv.i_sdl_height );
-    memset( xyuv.overlay->pixels[1], 128, xyuv.overlay->pitches[1] * xyuv.i_sdl_height / 2);
-    memset( xyuv.overlay->pixels[2], 128, xyuv.overlay->pitches[2] * xyuv.i_sdl_height / 2);
-    SDL_UnlockSurface( xyuv.screen );
-
-    if( xyuv.overlay == NULL ) {
-        fprintf( stderr, "recon: SDL_CreateYUVOverlay failed\n" );
-        return -1;
-    }
-
-    for( ;; ) {
-        SDL_Event event;
-        static int b_fullscreen = 0;
-        int64_t i_start = SDL_GetTicks();
-        int i_wait;
-
-        if( !xyuv.b_pause ) {
-            xyuv_display( &xyuv, xyuv.i_frame );
-        }
-
-        for( ;; ) {
-            int b_refresh = 0;
-            while( SDL_PollEvent( &event ) )  {
-                switch( event.type )
-                {
-                    case SDL_QUIT:
-                        if( b_fullscreen )
-                            SDL_WM_ToggleFullScreen( xyuv.screen );
-                        exit( 1 );
-
-                    case SDL_KEYDOWN:
-                        switch( event.key.keysym.sym )
-                        {
-                            case SDLK_q:
-                            case SDLK_ESCAPE:
-                                if( b_fullscreen )
-                                    SDL_WM_ToggleFullScreen( xyuv.screen );
-                                exit(1);
-
-                            case SDLK_f:
-                                if( SDL_WM_ToggleFullScreen( xyuv.screen ) )
-                                    b_fullscreen = 1 - b_fullscreen;
-                                break;
-
-                            case SDLK_g:
-                                if( xyuv.b_grid )
-                                    xyuv.b_grid = 0;
-                                else
-                                    xyuv.b_grid = 1;
-                                if( xyuv.b_pause )
-                                    b_refresh = 1;
-                                break;
-
-                            case SDLK_SPACE:
-                                if( xyuv.b_pause )
-                                    xyuv.b_pause = 0;
-                                else
-                                    xyuv.b_pause = 1;
-                                break;
-                            case SDLK_LEFT:
-                                if( xyuv.i_frame > 1 ) xyuv.i_frame--;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_RIGHT:
-                                if( xyuv.i_frame >= xyuv.i_frames )
-                                    xyuv_count_frames( &xyuv );
-                                if( xyuv.i_frame < xyuv.i_frames ) xyuv.i_frame++;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_HOME:
-                                xyuv.i_frame = 1;
-                                if( xyuv.b_pause )
-                                    b_refresh = 1;
-                                break;
-
-                            case SDLK_END:
-                                xyuv_count_frames( &xyuv );
-                                xyuv.i_frame = xyuv.i_frames;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_UP:
-                                xyuv.i_frame += xyuv.i_frames / 20;
-
-                                if( xyuv.i_frame >= xyuv.i_frames )
-                                    xyuv_count_frames( &xyuv );
-
-                                if( xyuv.i_frame > xyuv.i_frames )
-                                    xyuv.i_frame = xyuv.i_frames;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_DOWN:
-                                xyuv.i_frame -= xyuv.i_frames / 20;
-                                if( xyuv.i_frame < 1 )
-                                    xyuv.i_frame = 1;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_PAGEUP:
-                                xyuv.i_frame += xyuv.i_frames / 10;
-
-                                if( xyuv.i_frame >= xyuv.i_frames )
-                                    xyuv_count_frames( &xyuv );
-
-                                if( xyuv.i_frame > xyuv.i_frames )
-                                    xyuv.i_frame = xyuv.i_frames;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_PAGEDOWN:
-                                xyuv.i_frame -= xyuv.i_frames / 10;
-                                if( xyuv.i_frame < 1 )
-                                    xyuv.i_frame = 1;
-                                b_refresh = 1;
-                                break;
-
-                            default:
-                                break;
-                        }
-                        break;
-                    case SDL_VIDEORESIZE:
-                        xyuv.i_display_width = event.resize.w;
-                        xyuv.i_display_height = event.resize.h;
-                        xyuv.screen = SDL_SetVideoMode( xyuv.i_display_width, xyuv.i_display_height, 0,
-                                                        SDL_HWSURFACE|SDL_RESIZABLE|
-                                                        SDL_ASYNCBLIT|SDL_HWACCEL );
-                        xyuv_display( &xyuv, xyuv.i_frame );
-                        break;
-
-                    default:
-                        break;
-                }
-            }
-            if( b_refresh ) {
-                xyuv.b_pause = 1;
-                xyuv_display( &xyuv, xyuv.i_frame );
-            }
-            /* wait */
-            i_wait = 1000 / xyuv.f_fps - ( SDL_GetTicks() - i_start);
-            if( i_wait < 0 )
-                break;
-            else if( i_wait > 200 )
-                SDL_Delay( 200 );
-            else {
-                SDL_Delay( i_wait );
-                break;
-            }
-        }
-        if( !xyuv.b_pause ) {
-            /* next frame */
-            if( xyuv.i_frame == xyuv.i_frames )
-                    xyuv.b_pause = 1;
-            else if( xyuv.i_frame < xyuv.i_frames )
-                xyuv.i_frame++;
-        }
-    }
-
-
-    return 0;
-
-err_missing_arg:
-    fprintf( stderr, "missing arg for option=%s\n", argv[i] );
-    return -1;
-}
-
-
-static void xyuv_display( xyuv_t *xyuv, int i_frame )
-{
-    SDL_Rect rect;
-    int i_picture = 0;
-    int i;
-
-    if( i_frame > xyuv->i_frames )
-        return;
-
-    xyuv->i_frame = i_frame;
-
-    /* Load and copy pictue data */
-    for( i = 0; i < xyuv->i_yuv; i++ ) {
-        int i_plane;
-
-        fprintf( stderr, "yuv[%d] %d/%d\n", i, i_frame, xyuv->yuv[i].i_frames );
-        if( i_frame - 1 >= xyuv->yuv[i].i_frames ) {
-            xyuv_count_frames( xyuv );
-            if( i_frame - 1 >= xyuv->yuv[i].i_frames )
-                continue;
-        }
-        i_picture++;
-
-        fseek( xyuv->yuv[i].f, (xyuv->i_frame-1) * xyuv->i_frame_size, SEEK_SET );
-        fread( xyuv->pic, xyuv->i_frame_size, 1, xyuv->yuv[i].f );
-
-        SDL_LockYUVOverlay( xyuv->overlay );
-
-        if( xyuv->b_diff || xyuv->b_split ) {
-            /* Reset UV */
-            for( i_plane = 1; i_plane < 3; i_plane++ ) {
-                memset( xyuv->overlay->pixels[i_plane], 128, xyuv->overlay->pitches[i_plane] * xyuv->overlay->h / 2 );
-            }
-            /* Show diff in Y plane of overlay */
-
-            for( i_plane = 0; i_plane < 3; i_plane++ ) {
-                int div = i_plane == 0 ? 1 : 2;
-                uint8_t *src = xyuv->pic;
-                uint8_t *dst = xyuv->overlay->pixels[0] +
-                                (xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[0] );
-                int j;
-                if( i_plane == 1 ) {
-                    src +=  5*xyuv->i_width * xyuv->i_height/4;
-                    dst += xyuv->i_width;
-                } else if( i_plane == 2 ) {
-                    src += xyuv->i_width * xyuv->i_height;
-                    dst += xyuv->i_width + xyuv->i_height / 2 * xyuv->overlay->pitches[0];
-                }
-
-                for( j = 0; j < xyuv->i_height / div; j++ ) {
-                    if( i_picture == 1 || xyuv->b_split ) {
-                        memcpy( dst, src, xyuv->i_width / div );
-                    } else {
-                        int k;
-                        for( k = 0; k < xyuv->i_width / div; k++ ) {
-                            dst[k] = abs( dst[k] - src[k]);
-                        }
-                    }
-                    src += xyuv->i_width / div;
-                    dst += xyuv->overlay->pitches[0];
-                }
-            }
-        } else {
-            for( i_plane = 0; i_plane < 3; i_plane++ ) {
-                int div = i_plane == 0 ? 1 : 2;
-                uint8_t *src = xyuv->pic;
-                uint8_t *dst = xyuv->overlay->pixels[i_plane] +
-                                ((xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[i_plane] ) / div );
-                int w = xyuv->i_width / div;
-                int j;
-
-                if( i_plane == 1 ) {
-                    src +=  5*xyuv->i_width * xyuv->i_height/4;
-                } else if( i_plane == 2 ) {
-                    src += xyuv->i_width * xyuv->i_height;
-                }
-                if( xyuv->i_join > 0 ) {
-                    if( i_picture > 1 ) {
-                        src += xyuv->i_join / div;
-                        dst += xyuv->i_join / div;
-                        w = (xyuv->i_width - xyuv->i_join) /div;
-                    } else {
-                        w = xyuv->i_join / div;
-                    }
-                }
-
-                for( j = 0; j < xyuv->i_height / div; j++ ) {
-                    memcpy( dst, src, w );
-                    src += xyuv->i_width / div;
-                    dst += xyuv->overlay->pitches[i_plane];
-                }
-            }
-        }
-
-        SDL_UnlockYUVOverlay( xyuv->overlay );
-    }
-
-    if( xyuv->f_y != 0.0 ) {
-        uint8_t *pix = xyuv->overlay->pixels[0];
-        int j;
-
-        for( j = 0; j < xyuv->i_sdl_height; j++ ) {
-            int k;
-            for( k = 0; k < xyuv->i_sdl_width; k++ ) {
-                int v= pix[k] * xyuv->f_y;
-                if( v > 255 )
-                    pix[k] = 255;
-                else if( v < 0 )
-                    pix[k] = 0;
-                else
-                    pix[k] = v;
-            }
-            pix += xyuv->overlay->pitches[0];
-        }
-    }
-    if( xyuv->b_grid ) {
-        int x, y;
-
-        for( y = 0; y < xyuv->i_sdl_height; y += 4 ) {
-            uint8_t *p = xyuv->overlay->pixels[0] + y * xyuv->overlay->pitches[0];
-            for( x = 0; x < xyuv->i_sdl_width; x += 4 ) {
-                if( x%16== 0 || y%16 == 0 )
-                    p[x] = 0;
-            }
-        }
-    }
-
-    /* Update display */
-    rect.x = 0;
-    rect.y = 0;
-    rect.w = xyuv->i_display_width;
-    rect.h = xyuv->i_display_height;
-    SDL_DisplayYUVOverlay( xyuv->overlay, &rect );
-
-    /* Display title */
-    if( xyuv->title )
-        free( xyuv->title );
-    asprintf( &xyuv->title, SDL_TITLE, xyuv->yuv[0].name, xyuv->i_frame, xyuv->i_frames, xyuv->f_fps );
-    SDL_WM_SetCaption( xyuv->title, "" );
-}
-
-static void xyuv_count_frames( xyuv_t *xyuv )
-{
-    int i;
-
-    xyuv->i_frames = 0;
-    if( xyuv->i_frame_size <= 0 )
-        return;
-
-    for( i = 0; i < xyuv->i_yuv; i++ ) {
-        /* Beurk but avoid using fstat */
-        fseek( xyuv->yuv[i].f, 0, SEEK_END );
-
-        xyuv->yuv[i].i_frames = ftell( xyuv->yuv[i].f ) / xyuv->i_frame_size;
-        fprintf( stderr, "count (%d) -> %d\n", i, xyuv->yuv[i].i_frames );
-
-        fseek( xyuv->yuv[i].f, 0, SEEK_SET );
-
-        if( xyuv->i_frames < xyuv->yuv[i].i_frames )
-            xyuv->i_frames = xyuv->yuv[i].i_frames;
-    }
-}
-
-static inline int ssd( int a ) { return a*a; }
-
-static void xyuv_detect( int *pi_width, int *pi_height )
-{
-    static const int pi_size[][2] = {
-        {128, 96},
-        {160,120},
-        {320,244},
-        {320,288},
-
-        /* PAL */
-        {176,144},  // QCIF
-        {352,288},  // CIF
-        {352,576},  // 1/2 D1
-        {480,576},  // 2/3 D1
-        {544,576},
-        {640,576},  // VGA
-        {704,576},  // D1
-        {720,576},  // D1
-
-        /* NTSC */
-        {176,112},  // QCIF
-        {320,240},  // MPEG I
-        {352,240},  // CIF
-        {352,480},  // 1/2 D1
-        {480,480},  // 2/3 D1
-        {544,480},
-        {640,480},  // VGA
-        {704,480},  // D1
-        {720,480},  // D1
-
-        /* */
-        {0,0},
-    };
-    int i_max;
-    int i_size_max;
-    uint8_t *pic;
-    int i;
-
-    *pi_width = 0;
-    *pi_height = 0;
-
-    /* Compute size max */
-    for( i_max = 0, i_size_max = 0;
-            pi_size[i_max][0] != 0 && pi_size[i_max][1] != 0; i_max++ ) {
-        int s = pi_size[i_max][0] * pi_size[i_max][1] * 3 / 2;
-
-        if( i_size_max < s )
-            i_size_max = s;
-    }
-
-    /* Temporary buffer */
-    i_size_max *= 3;
-    pic = malloc( i_size_max );
-
-    fprintf( stderr, "guessing size for:\n" );
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
-        int j;
-        int i_read;
-        double dbest = 255*255;
-        int    i_best = i_max;
-        int64_t t;
-
-        fprintf( stderr, " - %s\n", xyuv.yuv[i].name );
-
-        i_read = fread( pic, 1, i_size_max, xyuv.yuv[i].f );
-        if( i_read < 0 )
-            continue;
-
-        /* Check if file size is at least compatible with one format
-         * (if not, ignore file size)*/
-        fseek( xyuv.yuv[i].f, 0, SEEK_END );
-        t = ftell( xyuv.yuv[i].f );
-        fseek( xyuv.yuv[i].f, 0, SEEK_SET );
-        for( j = 0; j < i_max; j++ ) {
-            const int w = pi_size[j][0];
-            const int h = pi_size[j][1];
-            const int s = w * h * 3 / 2;
-
-            if( t % s == 0 )
-                break;
-        }
-        if( j == i_max )
-            t = 0;
-
-
-        /* Try all size */
-        for( j = 0; j < i_max; j++ ) {
-            const int w = pi_size[j][0];
-            const int h = pi_size[j][1];
-            const int s = w * h * 3 / 2;
-            double dd;
-
-            int x, y, n;
-            int64_t d;
-
-            /* To small */
-            if( i_read < 3*s )
-                continue;
-            /* Check file size */
-            if( ( t > 0 && (t % s) != 0  ) ) {
-                fprintf( stderr, "  * %dx%d ignored (incompatible file size)\n", w, h );
-                continue;
-            }
-
-
-            /* We do a simple ssd between 2 consecutives lines */
-            d = 0;
-            for( n = 0; n < 3; n++ ) {
-                uint8_t *p;
-
-                /* Y */
-                p = &pic[n*s];
-                for( y = 0; y < h-1; y++ ) {
-                    for( x = 0; x < w; x++ )
-                        d += ssd( p[x] - p[w+x] );
-                    p += w;
-                }
-
-                /* U */
-                p = &pic[n*s+w*h];
-                for( y = 0; y < h/2-1; y++ ) {
-                    for( x = 0; x < w/2; x++ )
-                        d += ssd( p[x] - p[(w/2)+x] );
-                    p += w/2;
-                }
-
-                /* V */
-                p = &pic[n*s+5*w*h/4];
-                for( y = 0; y < h/2-1; y++ ) {
-                    for( x = 0; x < w/2; x++ )
-                        d += ssd( p[x] - p[(w/2)+x] );
-                    p += w/2;
-                }
-            }
-            dd = (double)d / (3*w*h*3/2);
-            fprintf( stderr, "  * %dx%d d=%f\n", w, h, dd );
-
-            if( dd < dbest ) {
-                i_best = j;
-                dbest = dd;
-            }
-        }
-
-        fseek( xyuv.yuv[i].f, 0, SEEK_SET );
-
-        if( i_best < i_max ) {
-            fprintf( stderr, "  -> %dx%d\n", pi_size[i_best][0], pi_size[i_best][1] );
-            *pi_width = pi_size[i_best][0];
-            *pi_height = pi_size[i_best][1];
-        }
-    }
-
-    free( pic );
-}

 
@@ -1,792 +0,0 @@
-/*****************************************************************************
- * xyuv.c: a SDL yuv 420 planer viewer.
- *****************************************************************************
- * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-
-#include <SDL/SDL.h>
-
-#define YUV_MAX 20
-#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps"
-typedef struct
-{
-    /* globals */
-    int     i_width;
-    int     i_height;
-    int     i_frame_size;
-    int     i_frame;
-    int     i_frames;
-    float   f_fps;
-
-    float   f_y;
-
-    int     b_pause;
-    int     b_grid;
-    int     b_split;
-    int     b_diff;
-    int     i_join;
-
-    /* Constructed picture */
-    int     i_wall_width;   /* in picture count */
-
-    /* YUV files */
-    int     i_yuv;
-    struct
-    {
-        char    *name;
-        FILE    *f;         /* handles */
-        int     i_frames;   /* frames count */
-
-        /* Position in the whole picture */
-        int     x, y;
-    } yuv[YUV_MAX];
-
-    /* SDL */
-    int i_sdl_width;
-    int i_sdl_height;
-
-    int i_display_width;
-    int i_display_height;
-    char *title;
-
-    SDL_Surface *screen;
-    SDL_Overlay *overlay;
-
-    /* */
-    uint8_t *pic;
-
-} xyuv_t;
-
-xyuv_t xyuv = {
-    .i_width = 0,
-    .i_height = 0,
-    .i_frame  = 1,
-    .i_frames = 0,
-    .f_fps = 25.0,
-    .f_y = 0.0,
-    .i_wall_width = 0,
-
-    .i_yuv = 0,
-
-    .b_pause = 0,
-    .b_split = 0,
-    .b_diff = 0,
-    .i_join = -1,
-
-    .title = NULL,
-    .pic = NULL,
-};
-
-static void help( void )
-{
-    fprintf( stderr,
-             "Syntax: xyuv [options] file [file2 ...]\n"
-             "\n"
-             "      --help                  Print this help\n"
-             "\n"
-             "  -s, --size <WIDTHxHEIGHT>   Set input size\n"
-             "  -w, --width <integer>       Set width\n"
-             "  -h, --height <integer>      Set height\n"
-             "\n"
-             "  -S, --split                 Show splited Y/U/V planes\n"
-             "  -d, --diff                  Show difference (only 2 files) in split mode\n"
-             "  -j, --joint <integer>\n"
-             "\n"
-             "  -y <float>                  Set Y factor\n"
-             "\n"
-             "  -g, --grid                  Show a grid (macroblock 16x16)\n"
-             "  -W <integer>                Set wall width (in picture count)\n"
-             "  -f, --fps <float>           Set fps\n"
-             "\n" );
-}
-
-static void xyuv_count_frames( xyuv_t *xyuv );
-static void xyuv_detect( int *pi_width, int *pi_height );
-static void xyuv_display( xyuv_t *xyuv, int i_frame );
-
-int main( int argc, char **argv )
-{
-    int i;
-
-    /* Parse commande line */
-    for( i = 1; i < argc; i++ ) {
-        if( !strcasecmp( argv[i], "--help" ) ) {
-            help();
-            return 0;
-        }
-        if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) {
-            xyuv.b_diff = 1;
-        } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) {
-            xyuv.b_split = 1;
-        } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_fps = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_height = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) {
-            char *p;
-
-            if( i >= argc -1 ) goto err_missing_arg;
-
-            xyuv.i_width = strtol( argv[++i], &p, 0 );
-            p++;
-            xyuv.i_height = atoi( p );
-        } else if( !strcmp( argv[i], "-W" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_wall_width = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-y" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.f_y = atof( argv[++i] );
-        } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) {
-            if( i >= argc -1 ) goto err_missing_arg;
-            xyuv.i_join = atoi( argv[++i] );
-        } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) {
-            xyuv.b_grid = 1;
-        } else {
-            FILE *f = fopen( argv[i], "rb" );
-            if( !f ) {
-                fprintf( stderr, "cannot open YUV %s\n", argv[i] );
-            } else {
-                xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] );
-                xyuv.yuv[xyuv.i_yuv].f = f;
-                xyuv.yuv[xyuv.i_yuv].i_frames = 0;
-
-                xyuv.i_yuv++;
-            }
-        }
-    }
-
-    if( xyuv.i_yuv == 0 ) {
-        fprintf( stderr, "no file to display\n" );
-        return -1;
-    }
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        char *psz = xyuv.yuv[0].name;
-        char *num;
-        char *x;
-        /* See if we find widthxheight in the file name */
-        for( ;; ) {
-            if( !( x = strchr( psz+1, 'x' ) ) ) {
-                break;
-            }
-            num = x;
-            while( num > psz && num[-1] >= '0' && num[-1] <= '9' )
-                num--;
-
-            if( num != x && x[1] >= '0' && x[1] <= '9' ) {
-                xyuv.i_width = atoi( num );
-                xyuv.i_height = atoi( x+1 );
-                break;
-            }
-            psz = x;
-        }
-        fprintf( stderr, "file name gives %dx%d\n", xyuv.i_width, xyuv.i_height );
-    }
-
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        xyuv_detect( &xyuv.i_width, &xyuv.i_height );
-    }
-
-    if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
-        fprintf( stderr, "invalid or missing frames size\n" );
-        return -1;
-    }
-    if( xyuv.b_diff && xyuv.i_yuv != 2 ) {
-        fprintf( stderr, "--diff works only with 2 files\n" );
-        return -1;
-    }
-    if( (xyuv.i_join == 0 || xyuv.i_join >= xyuv.i_width) && xyuv.i_yuv != 2 ) {
-        fprintf( stderr, "--join woeks only with two files and range is [1, width-1]\n" );
-        return -1;
-    }
-    if( xyuv.i_join % 2 != 0 ) {
-        if( xyuv.i_join + 1 < xyuv.i_width )
-            xyuv.i_join++;
-        else
-            xyuv.i_join--;
-    }
-
-    /* Now check frames */
-    fprintf( stderr, "displaying :\n" );
-    xyuv.i_frame_size = 3 * xyuv.i_width * xyuv.i_height / 2;
-    xyuv_count_frames( &xyuv );
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
-        fprintf( stderr, " - '%s' : %d frames\n", xyuv.yuv[i].name, xyuv.yuv[i].i_frames );
-    }
-
-    if( xyuv.i_frames == 0 ) {
-        fprintf( stderr, "no frames to display\n" );
-    }
-
-    xyuv.pic = malloc( xyuv.i_frame_size );
-
-    /* calculate SDL view */
-    if( xyuv.i_wall_width > xyuv.i_yuv ) {
-        xyuv.i_wall_width = xyuv.i_yuv;
-    }
-    if( xyuv.i_wall_width == 0 ) {
-        while( xyuv.i_wall_width < xyuv.i_yuv && xyuv.i_wall_width * xyuv.i_wall_width < xyuv.i_yuv ) {
-            xyuv.i_wall_width++;
-        }
-    }
-
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
-        if( xyuv.b_diff || xyuv.i_join > 0 ) {
-            xyuv.yuv[i].x = 0;
-            xyuv.yuv[i].y = 0;
-        } else if( xyuv.b_split ) {
-            xyuv.yuv[i].x = (i%xyuv.i_wall_width) * 3 * xyuv.i_width / 2;
-            xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
-        } else {
-            xyuv.yuv[i].x = (i%xyuv.i_wall_width) * xyuv.i_width;
-            xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
-        }
-    }
-    if( xyuv.b_diff ) {
-        xyuv.i_sdl_width = 3 * xyuv.i_width / 2;
-        xyuv.i_sdl_height= xyuv.i_height;
-    } else if( xyuv.i_join > 0 ) {
-        xyuv.i_sdl_width = xyuv.i_width;
-        xyuv.i_sdl_height= xyuv.i_height;
-    } else if( xyuv.b_split ) {
-        xyuv.i_sdl_width = xyuv.i_wall_width * 3 * xyuv.i_width / 2;
-        xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv  + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
-    } else {
-        xyuv.i_sdl_width = xyuv.i_wall_width * xyuv.i_width;
-        xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv  + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
-    }
-    xyuv.i_display_width = xyuv.i_sdl_width;
-    xyuv.i_display_height = xyuv.i_sdl_height;
-
-    /* Open SDL */
-    if( SDL_Init( SDL_INIT_EVENTTHREAD|SDL_INIT_NOPARACHUTE|SDL_INIT_VIDEO) ) {
-        fprintf( stderr, "cannot init SDL\n" );
-        return -1;
-    }
-
-    SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, 100 );
-    SDL_EventState( SDL_KEYUP, SDL_IGNORE );
-
-    xyuv.screen = SDL_SetVideoMode( xyuv.i_sdl_width, xyuv.i_sdl_height, 0,
-                                    SDL_HWSURFACE|SDL_RESIZABLE|
-                                    SDL_ASYNCBLIT|SDL_HWACCEL );
-    if( xyuv.screen == NULL ) {
-        fprintf( stderr, "SDL_SetVideoMode failed\n" );
-        return -1;
-    }
-
-    SDL_LockSurface( xyuv.screen );
-    xyuv.overlay = SDL_CreateYUVOverlay( xyuv.i_sdl_width, xyuv.i_sdl_height,
-                                         SDL_YV12_OVERLAY,
-                                         xyuv.screen );
-    /* reset with black */
-    memset( xyuv.overlay->pixels[0],   0, xyuv.overlay->pitches[0] * xyuv.i_sdl_height );
-    memset( xyuv.overlay->pixels[1], 128, xyuv.overlay->pitches[1] * xyuv.i_sdl_height / 2);
-    memset( xyuv.overlay->pixels[2], 128, xyuv.overlay->pitches[2] * xyuv.i_sdl_height / 2);
-    SDL_UnlockSurface( xyuv.screen );
-
-    if( xyuv.overlay == NULL ) {
-        fprintf( stderr, "recon: SDL_CreateYUVOverlay failed\n" );
-        return -1;
-    }
-
-    for( ;; ) {
-        SDL_Event event;
-        static int b_fullscreen = 0;
-        int64_t i_start = SDL_GetTicks();
-        int i_wait;
-
-        if( !xyuv.b_pause ) {
-            xyuv_display( &xyuv, xyuv.i_frame );
-        }
-
-        for( ;; ) {
-            int b_refresh = 0;
-            while( SDL_PollEvent( &event ) )  {
-                switch( event.type )
-                {
-                    case SDL_QUIT:
-                        if( b_fullscreen )
-                            SDL_WM_ToggleFullScreen( xyuv.screen );
-                        exit( 1 );
-
-                    case SDL_KEYDOWN:
-                        switch( event.key.keysym.sym )
-                        {
-                            case SDLK_q:
-                            case SDLK_ESCAPE:
-                                if( b_fullscreen )
-                                    SDL_WM_ToggleFullScreen( xyuv.screen );
-                                exit(1);
-
-                            case SDLK_f:
-                                if( SDL_WM_ToggleFullScreen( xyuv.screen ) )
-                                    b_fullscreen = 1 - b_fullscreen;
-                                break;
-
-                            case SDLK_g:
-                                if( xyuv.b_grid )
-                                    xyuv.b_grid = 0;
-                                else
-                                    xyuv.b_grid = 1;
-                                if( xyuv.b_pause )
-                                    b_refresh = 1;
-                                break;
-
-                            case SDLK_SPACE:
-                                if( xyuv.b_pause )
-                                    xyuv.b_pause = 0;
-                                else
-                                    xyuv.b_pause = 1;
-                                break;
-                            case SDLK_LEFT:
-                                if( xyuv.i_frame > 1 ) xyuv.i_frame--;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_RIGHT:
-                                if( xyuv.i_frame >= xyuv.i_frames )
-                                    xyuv_count_frames( &xyuv );
-                                if( xyuv.i_frame < xyuv.i_frames ) xyuv.i_frame++;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_HOME:
-                                xyuv.i_frame = 1;
-                                if( xyuv.b_pause )
-                                    b_refresh = 1;
-                                break;
-
-                            case SDLK_END:
-                                xyuv_count_frames( &xyuv );
-                                xyuv.i_frame = xyuv.i_frames;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_UP:
-                                xyuv.i_frame += xyuv.i_frames / 20;
-
-                                if( xyuv.i_frame >= xyuv.i_frames )
-                                    xyuv_count_frames( &xyuv );
-
-                                if( xyuv.i_frame > xyuv.i_frames )
-                                    xyuv.i_frame = xyuv.i_frames;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_DOWN:
-                                xyuv.i_frame -= xyuv.i_frames / 20;
-                                if( xyuv.i_frame < 1 )
-                                    xyuv.i_frame = 1;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_PAGEUP:
-                                xyuv.i_frame += xyuv.i_frames / 10;
-
-                                if( xyuv.i_frame >= xyuv.i_frames )
-                                    xyuv_count_frames( &xyuv );
-
-                                if( xyuv.i_frame > xyuv.i_frames )
-                                    xyuv.i_frame = xyuv.i_frames;
-                                b_refresh = 1;
-                                break;
-
-                            case SDLK_PAGEDOWN:
-                                xyuv.i_frame -= xyuv.i_frames / 10;
-                                if( xyuv.i_frame < 1 )
-                                    xyuv.i_frame = 1;
-                                b_refresh = 1;
-                                break;
-
-                            default:
-                                break;
-                        }
-                        break;
-                    case SDL_VIDEORESIZE:
-                        xyuv.i_display_width = event.resize.w;
-                        xyuv.i_display_height = event.resize.h;
-                        xyuv.screen = SDL_SetVideoMode( xyuv.i_display_width, xyuv.i_display_height, 0,
-                                                        SDL_HWSURFACE|SDL_RESIZABLE|
-                                                        SDL_ASYNCBLIT|SDL_HWACCEL );
-                        xyuv_display( &xyuv, xyuv.i_frame );
-                        break;
-
-                    default:
-                        break;
-                }
-            }
-            if( b_refresh ) {
-                xyuv.b_pause = 1;
-                xyuv_display( &xyuv, xyuv.i_frame );
-            }
-            /* wait */
-            i_wait = 1000 / xyuv.f_fps - ( SDL_GetTicks() - i_start);
-            if( i_wait < 0 )
-                break;
-            else if( i_wait > 200 )
-                SDL_Delay( 200 );
-            else {
-                SDL_Delay( i_wait );
-                break;
-            }
-        }
-        if( !xyuv.b_pause ) {
-            /* next frame */
-            if( xyuv.i_frame == xyuv.i_frames )
-                    xyuv.b_pause = 1;
-            else if( xyuv.i_frame < xyuv.i_frames )
-                xyuv.i_frame++;
-        }
-    }
-
-
-    return 0;
-
-err_missing_arg:
-    fprintf( stderr, "missing arg for option=%s\n", argv[i] );
-    return -1;
-}
-
-
-static void xyuv_display( xyuv_t *xyuv, int i_frame )
-{
-    SDL_Rect rect;
-    int i_picture = 0;
-    int i;
-
-    if( i_frame > xyuv->i_frames )
-        return;
-
-    xyuv->i_frame = i_frame;
-
-    /* Load and copy pictue data */
-    for( i = 0; i < xyuv->i_yuv; i++ ) {
-        int i_plane;
-
-        fprintf( stderr, "yuv[%d] %d/%d\n", i, i_frame, xyuv->yuv[i].i_frames );
-        if( i_frame - 1 >= xyuv->yuv[i].i_frames ) {
-            xyuv_count_frames( xyuv );
-            if( i_frame - 1 >= xyuv->yuv[i].i_frames )
-                continue;
-        }
-        i_picture++;
-
-        fseek( xyuv->yuv[i].f, (xyuv->i_frame-1) * xyuv->i_frame_size, SEEK_SET );
-        fread( xyuv->pic, xyuv->i_frame_size, 1, xyuv->yuv[i].f );
-
-        SDL_LockYUVOverlay( xyuv->overlay );
-
-        if( xyuv->b_diff || xyuv->b_split ) {
-            /* Reset UV */
-            for( i_plane = 1; i_plane < 3; i_plane++ ) {
-                memset( xyuv->overlay->pixels[i_plane], 128, xyuv->overlay->pitches[i_plane] * xyuv->overlay->h / 2 );
-            }
-            /* Show diff in Y plane of overlay */
-
-            for( i_plane = 0; i_plane < 3; i_plane++ ) {
-                int div = i_plane == 0 ? 1 : 2;
-                uint8_t *src = xyuv->pic;
-                uint8_t *dst = xyuv->overlay->pixels[0] +
-                                (xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[0] );
-                int j;
-                if( i_plane == 1 ) {
-                    src +=  5*xyuv->i_width * xyuv->i_height/4;
-                    dst += xyuv->i_width;
-                } else if( i_plane == 2 ) {
-                    src += xyuv->i_width * xyuv->i_height;
-                    dst += xyuv->i_width + xyuv->i_height / 2 * xyuv->overlay->pitches[0];
-                }
-
-                for( j = 0; j < xyuv->i_height / div; j++ ) {
-                    if( i_picture == 1 || xyuv->b_split ) {
-                        memcpy( dst, src, xyuv->i_width / div );
-                    } else {
-                        int k;
-                        for( k = 0; k < xyuv->i_width / div; k++ ) {
-                            dst[k] = abs( dst[k] - src[k]);
-                        }
-                    }
-                    src += xyuv->i_width / div;
-                    dst += xyuv->overlay->pitches[0];
-                }
-            }
-        } else {
-            for( i_plane = 0; i_plane < 3; i_plane++ ) {
-                int div = i_plane == 0 ? 1 : 2;
-                uint8_t *src = xyuv->pic;
-                uint8_t *dst = xyuv->overlay->pixels[i_plane] +
-                                ((xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[i_plane] ) / div );
-                int w = xyuv->i_width / div;
-                int j;
-
-                if( i_plane == 1 ) {
-                    src +=  5*xyuv->i_width * xyuv->i_height/4;
-                } else if( i_plane == 2 ) {
-                    src += xyuv->i_width * xyuv->i_height;
-                }
-                if( xyuv->i_join > 0 ) {
-                    if( i_picture > 1 ) {
-                        src += xyuv->i_join / div;
-                        dst += xyuv->i_join / div;
-                        w = (xyuv->i_width - xyuv->i_join) /div;
-                    } else {
-                        w = xyuv->i_join / div;
-                    }
-                }
-
-                for( j = 0; j < xyuv->i_height / div; j++ ) {
-                    memcpy( dst, src, w );
-                    src += xyuv->i_width / div;
-                    dst += xyuv->overlay->pitches[i_plane];
-                }
-            }
-        }
-
-        SDL_UnlockYUVOverlay( xyuv->overlay );
-    }
-
-    if( xyuv->f_y != 0.0 ) {
-        uint8_t *pix = xyuv->overlay->pixels[0];
-        int j;
-
-        for( j = 0; j < xyuv->i_sdl_height; j++ ) {
-            int k;
-            for( k = 0; k < xyuv->i_sdl_width; k++ ) {
-                int v= pix[k] * xyuv->f_y;
-                if( v > 255 )
-                    pix[k] = 255;
-                else if( v < 0 )
-                    pix[k] = 0;
-                else
-                    pix[k] = v;
-            }
-            pix += xyuv->overlay->pitches[0];
-        }
-    }
-    if( xyuv->b_grid ) {
-        int x, y;
-
-        for( y = 0; y < xyuv->i_sdl_height; y += 4 ) {
-            uint8_t *p = xyuv->overlay->pixels[0] + y * xyuv->overlay->pitches[0];
-            for( x = 0; x < xyuv->i_sdl_width; x += 4 ) {
-                if( x%16== 0 || y%16 == 0 )
-                    p[x] = 0;
-            }
-        }
-    }
-
-    /* Update display */
-    rect.x = 0;
-    rect.y = 0;
-    rect.w = xyuv->i_display_width;
-    rect.h = xyuv->i_display_height;
-    SDL_DisplayYUVOverlay( xyuv->overlay, &rect );
-
-    /* Display title */
-    if( xyuv->title )
-        free( xyuv->title );
-    asprintf( &xyuv->title, SDL_TITLE, xyuv->yuv[0].name, xyuv->i_frame, xyuv->i_frames, xyuv->f_fps );
-    SDL_WM_SetCaption( xyuv->title, "" );
-}
-
-static void xyuv_count_frames( xyuv_t *xyuv )
-{
-    int i;
-
-    xyuv->i_frames = 0;
-    if( xyuv->i_frame_size <= 0 )
-        return;
-
-    for( i = 0; i < xyuv->i_yuv; i++ ) {
-        /* Beurk but avoid using fstat */
-        fseek( xyuv->yuv[i].f, 0, SEEK_END );
-
-        xyuv->yuv[i].i_frames = ftell( xyuv->yuv[i].f ) / xyuv->i_frame_size;
-        fprintf( stderr, "count (%d) -> %d\n", i, xyuv->yuv[i].i_frames );
-
-        fseek( xyuv->yuv[i].f, 0, SEEK_SET );
-
-        if( xyuv->i_frames < xyuv->yuv[i].i_frames )
-            xyuv->i_frames = xyuv->yuv[i].i_frames;
-    }
-}
-
-static inline int ssd( int a ) { return a*a; }
-
-static void xyuv_detect( int *pi_width, int *pi_height )
-{
-    static const int pi_size[][2] = {
-        {128, 96},
-        {160,120},
-        {320,244},
-        {320,288},
-
-        /* PAL */
-        {176,144},  // QCIF
-        {352,288},  // CIF
-        {352,576},  // 1/2 D1
-        {480,576},  // 2/3 D1
-        {544,576},
-        {640,576},  // VGA
-        {704,576},  // D1
-        {720,576},  // D1
-
-        /* NTSC */
-        {176,112},  // QCIF
-        {320,240},  // MPEG I
-        {352,240},  // CIF
-        {352,480},  // 1/2 D1
-        {480,480},  // 2/3 D1
-        {544,480},
-        {640,480},  // VGA
-        {704,480},  // D1
-        {720,480},  // D1
-
-        /* */
-        {0,0},
-    };
-    int i_max;
-    int i_size_max;
-    uint8_t *pic;
-    int i;
-
-    *pi_width = 0;
-    *pi_height = 0;
-
-    /* Compute size max */
-    for( i_max = 0, i_size_max = 0;
-            pi_size[i_max][0] != 0 && pi_size[i_max][1] != 0; i_max++ ) {
-        int s = pi_size[i_max][0] * pi_size[i_max][1] * 3 / 2;
-
-        if( i_size_max < s )
-            i_size_max = s;
-    }
-
-    /* Temporary buffer */
-    i_size_max *= 3;
-    pic = malloc( i_size_max );
-
-    fprintf( stderr, "guessing size for:\n" );
-    for( i = 0; i < xyuv.i_yuv; i++ ) {
-        int j;
-        int i_read;
-        double dbest = 255*255;
-        int    i_best = i_max;
-        int64_t t;
-
-        fprintf( stderr, " - %s\n", xyuv.yuv[i].name );
-
-        i_read = fread( pic, 1, i_size_max, xyuv.yuv[i].f );
-        if( i_read < 0 )
-            continue;
-
-        /* Check if file size is at least compatible with one format
-         * (if not, ignore file size)*/
-        fseek( xyuv.yuv[i].f, 0, SEEK_END );
-        t = ftell( xyuv.yuv[i].f );
-        fseek( xyuv.yuv[i].f, 0, SEEK_SET );
-        for( j = 0; j < i_max; j++ ) {
-            const int w = pi_size[j][0];
-            const int h = pi_size[j][1];
-            const int s = w * h * 3 / 2;
-
-            if( t % s == 0 )
-                break;
-        }
-        if( j == i_max )
-            t = 0;
-
-
-        /* Try all size */
-        for( j = 0; j < i_max; j++ ) {
-            const int w = pi_size[j][0];
-            const int h = pi_size[j][1];
-            const int s = w * h * 3 / 2;
-            double dd;
-
-            int x, y, n;
-            int64_t d;
-
-            /* To small */
-            if( i_read < 3*s )
-                continue;
-            /* Check file size */
-            if( ( t > 0 && (t % s) != 0  ) ) {
-                fprintf( stderr, "  * %dx%d ignored (incompatible file size)\n", w, h );
-                continue;
-            }
-
-
-            /* We do a simple ssd between 2 consecutives lines */
-            d = 0;
-            for( n = 0; n < 3; n++ ) {
-                uint8_t *p;
-
-                /* Y */
-                p = &pic[n*s];
-                for( y = 0; y < h-1; y++ ) {
-                    for( x = 0; x < w; x++ )
-                        d += ssd( p[x] - p[w+x] );
-                    p += w;
-                }
-
-                /* U */
-                p = &pic[n*s+w*h];
-                for( y = 0; y < h/2-1; y++ ) {
-                    for( x = 0; x < w/2; x++ )
-                        d += ssd( p[x] - p[(w/2)+x] );
-                    p += w/2;
-                }
-
-                /* V */
-                p = &pic[n*s+5*w*h/4];
-                for( y = 0; y < h/2-1; y++ ) {
-                    for( x = 0; x < w/2; x++ )
-                        d += ssd( p[x] - p[(w/2)+x] );
-                    p += w/2;
-                }
-            }
-            dd = (double)d / (3*w*h*3/2);
-            fprintf( stderr, "  * %dx%d d=%f\n", w, h, dd );
-
-            if( dd < dbest ) {
-                i_best = j;
-                dbest = dd;
-            }
-        }
-
-        fseek( xyuv.yuv[i].f, 0, SEEK_SET );
-
-        if( i_best < i_max ) {
-            fprintf( stderr, "  -> %dx%d\n", pi_size[i_best][0], pi_size[i_best][1] );
-            *pi_width = pi_size[i_best][0];
-            *pi_height = pi_size[i_best][1];
-        }
-    }
-
-    free( pic );
-}
​

x264-snapshot-20130723-2245.tar.bz2/AUTHORS -> x264-snapshot-20140321-2245.tar.bz2/AUTHORS Changed

 
@@ -43,8 +43,8 @@
 S: Brittany, France
 
 N: Henrik Gramner
-E: hengar-6 AT student DOT ltu DOT se
-D: 4:2:2 chroma subsampling, x86 asm
+E: henrik AT gramner DOT com
+D: 4:2:2 chroma subsampling, x86 asm, Windows improvements, bugfixes
 S: Sweden
 
 N: Jason Garrett-Glaser
@@ -99,7 +99,3 @@
 E: radoslaw AT syskin DOT cjb DOT net
 D: Cached motion compensation
 
-N: Tuukka Toivonen
-E: tuukkat AT ee DOT oulu DOT fi
-D: Visualization
-
​

x264-snapshot-20130723-2245.tar.bz2/Makefile -> x264-snapshot-20140321-2245.tar.bz2/Makefile Changed

@@ -69,9 +69,8 @@
 SRCCLI += output/mp4.c
 endif
 
-# Visualization sources
-ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
-SRCS   += common/visualize.c common/display-x11.c
+ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),)
+SRCCLI += output/mp4_lsmash.c
 endif
 
 # MMX/SSE optims
@@ -247,29 +246,29 @@
 	rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
 
 install-cli: cli
-	install -d $(DESTDIR)$(bindir)
-	install x264$(EXE) $(DESTDIR)$(bindir)
+	$(INSTALL) -d $(DESTDIR)$(bindir)
+	$(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
 
 install-lib-dev:
-	install -d $(DESTDIR)$(includedir)
-	install -d $(DESTDIR)$(libdir)
-	install -d $(DESTDIR)$(libdir)/pkgconfig
-	install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
-	install -m 644 x264_config.h $(DESTDIR)$(includedir)
-	install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL) -d $(DESTDIR)$(includedir)
+	$(INSTALL) -d $(DESTDIR)$(libdir)
+	$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
+	$(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
+	$(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
 
 install-lib-static: lib-static install-lib-dev
-	install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
+	$(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
 	$(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
 
 install-lib-shared: lib-shared install-lib-dev
 ifneq ($(IMPLIBNAME),)
-	install -d $(DESTDIR)$(bindir)
-	install -m 755 $(SONAME) $(DESTDIR)$(bindir)
-	install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
+	$(INSTALL) -d $(DESTDIR)$(bindir)
+	$(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
+	$(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
 else ifneq ($(SONAME),)
 	ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
-	install -m 755 $(SONAME) $(DESTDIR)$(libdir)
+	$(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir)
 endif
 
 uninstall:

 
@@ -69,9 +69,8 @@
 SRCCLI += output/mp4.c
 endif
 
-# Visualization sources
-ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
-SRCS   += common/visualize.c common/display-x11.c
+ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),)
+SRCCLI += output/mp4_lsmash.c
 endif
 
 # MMX/SSE optims
@@ -247,29 +246,29 @@
    rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
 
 install-cli: cli
-   install -d $(DESTDIR)$(bindir)
-   install x264$(EXE) $(DESTDIR)$(bindir)
+   $(INSTALL) -d $(DESTDIR)$(bindir)
+   $(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
 
 install-lib-dev:
-   install -d $(DESTDIR)$(includedir)
-   install -d $(DESTDIR)$(libdir)
-   install -d $(DESTDIR)$(libdir)/pkgconfig
-   install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
-   install -m 644 x264_config.h $(DESTDIR)$(includedir)
-   install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
+   $(INSTALL) -d $(DESTDIR)$(includedir)
+   $(INSTALL) -d $(DESTDIR)$(libdir)
+   $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
+   $(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
+   $(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
+   $(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
 
 install-lib-static: lib-static install-lib-dev
-   install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
+   $(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
    $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
 
 install-lib-shared: lib-shared install-lib-dev
 ifneq ($(IMPLIBNAME),)
-   install -d $(DESTDIR)$(bindir)
-   install -m 755 $(SONAME) $(DESTDIR)$(bindir)
-   install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
+   $(INSTALL) -d $(DESTDIR)$(bindir)
+   $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
+   $(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
 else ifneq ($(SONAME),)
    ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
-   install -m 755 $(SONAME) $(DESTDIR)$(libdir)
+   $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir)
 endif
 
 uninstall:
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/asm.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/asm.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * asm.S: arm utility macros
  *****************************************************************************
- * Copyright (C) 2008-2013 x264 project
+ * Copyright (C) 2008-2014 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *          David Conrad <lessen42@gmail.com>
@@ -26,6 +26,8 @@
 
 #include "config.h"
 
+.syntax unified
+
 #ifdef PREFIX
 #   define EXTERN_ASM _
 #else
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/cpu-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/cpu-a.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu-a.S: arm cpu detection
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,7 +26,7 @@
 #include "asm.S"
 
 .fpu neon
-.align
+.align 2
 
 // done in gas because .fpu neon overrides the refusal to assemble
 // instructions the selected -march/-mcpu doesn't support
@@ -95,7 +95,7 @@
     sub         r2, r2, r1
     cmpgt       r2, #30 << 3    // assume context switch if it took over 30 cycles
     addle       r3, r3, r2
-    subles      ip, ip, #1
+    subsle      ip, ip, #1
     bgt         average_loop
 
     // disable counters if we enabled them
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/dct-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct-a.S Changed

 
@@ -1,7 +1,7 @@
 /****************************************************************************
  * dct-a.S: arm transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: arm transform and zigzag
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/deblock-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/deblock-a.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.S: arm deblocking
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-a.S Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.S: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -167,7 +167,7 @@
     ldr         ip, [sp, #8]
     push        {r4-r6,lr}
     cmp         ip, #32
-    ldrd        r4, [sp, #16]
+    ldrd        r4, r5, [sp, #16]
     mov         lr, #\h
     beq         x264_pixel_avg_w\w\()_neon
     rsbs        r6,  ip,  #64
@@ -447,7 +447,7 @@
 .ifc \type, full
     ldr         lr,  [r4, #32]      // denom
 .endif
-    ldrd        r4,  [r4, #32+4]    // scale, offset
+    ldrd        r4,  r5,  [r4, #32+4]    // scale, offset
     vdup.16     q0,  r4
     vdup.16     q1,  r5
 .ifc \type, full
@@ -818,8 +818,8 @@
 function x264_mc_chroma_neon
     push            {r4-r8, lr}
     vpush           {d8-d11}
-    ldrd            r4, [sp, #56]
-    ldrd            r6, [sp, #64]
+    ldrd            r4, r5, [sp, #56]
+    ldrd            r6, r7, [sp, #64]
 
     asr             lr, r6, #3
     mul             lr, r4, lr
@@ -1380,8 +1380,8 @@
 function x264_frame_init_lowres_core_neon
     push            {r4-r10,lr}
     vpush           {d8-d15}
-    ldrd            r4,  [sp, #96]
-    ldrd            r6,  [sp, #104]
+    ldrd            r4,  r5,  [sp, #96]
+    ldrd            r6,  r7,  [sp, #104]
     ldr             lr,  [sp, #112]
     sub             r10, r6,  r7            // dst_stride - width
     and             r10, r10, #~15

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.S: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -167,7 +167,7 @@
     ldr         ip, [sp, #8]
     push        {r4-r6,lr}
     cmp         ip, #32
-    ldrd        r4, [sp, #16]
+    ldrd        r4, r5, [sp, #16]
     mov         lr, #\h
     beq         x264_pixel_avg_w\w\()_neon
     rsbs        r6,  ip,  #64
@@ -447,7 +447,7 @@
 .ifc \type, full
     ldr         lr,  [r4, #32]      // denom
 .endif
-    ldrd        r4,  [r4, #32+4]    // scale, offset
+    ldrd        r4,  r5,  [r4, #32+4]    // scale, offset
     vdup.16     q0,  r4
     vdup.16     q1,  r5
 .ifc \type, full
@@ -818,8 +818,8 @@
 function x264_mc_chroma_neon
     push            {r4-r8, lr}
     vpush           {d8-d11}
-    ldrd            r4, [sp, #56]
-    ldrd            r6, [sp, #64]
+    ldrd            r4, r5, [sp, #56]
+    ldrd            r6, r7, [sp, #64]
 
     asr             lr, r6, #3
     mul             lr, r4, lr
@@ -1380,8 +1380,8 @@
 function x264_frame_init_lowres_core_neon
     push            {r4-r10,lr}
     vpush           {d8-d15}
-    ldrd            r4,  [sp, #96]
-    ldrd            r6,  [sp, #104]
+    ldrd            r4,  r5,  [sp, #96]
+    ldrd            r6,  r7,  [sp, #104]
     ldr             lr,  [sp, #112]
     sub             r10, r6,  r7            // dst_stride - width
     and             r10, r10, #~15
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-c.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: arm motion compensation
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel-a.S Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.S: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -328,9 +328,9 @@
 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
     push        {r6-r7,lr}
 .if \x == 3
-    ldrd        r6,  [sp, #12]
+    ldrd        r6,  r7,  [sp, #12]
 .else
-    ldrd        r6,  [sp, #16]
+    ldrd        r6,  r7,  [sp, #16]
     ldr         r12, [sp, #12]
 .endif
     mov         lr,  #FENC_STRIDE
@@ -519,6 +519,38 @@
     b               x264_var_end
 .endfunc
 
+function x264_pixel_var_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d18}, [r0,:64], r1
+    vmull.u8        q1,  d16, d16
+    vmovl.u8        q0,  d16
+    vld1.64         {d20}, [r0,:64], r1
+    vmull.u8        q2,  d18, d18
+    vaddw.u8        q0,  q0,  d18
+
+    mov             ip,  #12
+
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q1,   q14,  d20, vpaddl.u16
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q2,   q15,  d22, vpaddl.u16
+
+1:  subs            ip,  ip,  #4
+    vld1.64         {d18}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q14,  q12, d16
+    vld1.64         {d20}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q15,  q13, d18
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q12,  q14, d20
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               1b
+2:
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               x264_var_end
+.endfunc
+
 function x264_pixel_var_16x16_neon
     vld1.64         {d16-d17}, [r0,:128], r1
     vmull.u8        q12, d16, d16
@@ -596,13 +628,56 @@
     vadd.s32        d1,  d2,  d3
     vpadd.s32       d0,  d0,  d1
 
-    vmov.32         r0,  r1,  d0
+    vmov            r0,  r1,  d0
     vst1.32         {d0[1]}, [ip,:32]
     mul             r0,  r0,  r0
     sub             r0,  r1,  r0,  lsr #6
     bx              lr
 .endfunc
 
+function x264_pixel_var2_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d17}, [r2,:64], r3
+    vld1.64         {d18}, [r0,:64], r1
+    vld1.64         {d19}, [r2,:64], r3
+    vsubl.u8        q10, d16, d17
+    vsubl.u8        q11, d18, d19
+    SQR_ACC         q1,  d20, d21,  vmull.s16
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q10, q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23,  vmull.s16
+    mov             ip,  #14
+1:  subs            ip,  ip,  #2
+    vld1.64         {d18}, [r0,:64], r1
+    vsubl.u8        q10, d16, d17
+    vld1.64         {d19}, [r2,:64], r3
+    vadd.s16        q0,  q0,  q10
+    SQR_ACC         q1,  d20, d21
+    vsubl.u8        q11, d18, d19
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q0,  q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23
+    b               1b
+2:
+    vadd.s16        q0,  q0,  q11
+    SQR_ACC         q2,  d22, d23
+
+    ldr             ip,  [sp]
+    vadd.s16        d0,  d0,  d1
+    vadd.s32        q1,  q1,  q2
+    vpaddl.s16      d0,  d0
+    vadd.s32        d1,  d2,  d3
+    vpadd.s32       d0,  d0,  d1
+
+    vmov            r0,  r1,  d0
+    vst1.32         {d0[1]}, [ip,:32]
+    mul             r0,  r0,  r0
+    sub             r0,  r1,  r0,  lsr #7
+    bx              lr
+.endfunc
 
 .macro LOAD_DIFF_8x4 q0 q1 q2 q3
     vld1.32     {d1}, [r2], r3

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.S: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -328,9 +328,9 @@
 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
     push        {r6-r7,lr}
 .if \x == 3
-    ldrd        r6,  [sp, #12]
+    ldrd        r6,  r7,  [sp, #12]
 .else
-    ldrd        r6,  [sp, #16]
+    ldrd        r6,  r7,  [sp, #16]
     ldr         r12, [sp, #12]
 .endif
     mov         lr,  #FENC_STRIDE
@@ -519,6 +519,38 @@
     b               x264_var_end
 .endfunc
 
+function x264_pixel_var_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d18}, [r0,:64], r1
+    vmull.u8        q1,  d16, d16
+    vmovl.u8        q0,  d16
+    vld1.64         {d20}, [r0,:64], r1
+    vmull.u8        q2,  d18, d18
+    vaddw.u8        q0,  q0,  d18
+
+    mov             ip,  #12
+
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q1,   q14,  d20, vpaddl.u16
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q2,   q15,  d22, vpaddl.u16
+
+1:  subs            ip,  ip,  #4
+    vld1.64         {d18}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q14,  q12, d16
+    vld1.64         {d20}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q15,  q13, d18
+    vld1.64         {d22}, [r0,:64], r1
+    VAR_SQR_SUM     q1,  q12,  q14, d20
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               1b
+2:
+    VAR_SQR_SUM     q2,  q13,  q15, d22
+    b               x264_var_end
+.endfunc
+
 function x264_pixel_var_16x16_neon
     vld1.64         {d16-d17}, [r0,:128], r1
     vmull.u8        q12, d16, d16
@@ -596,13 +628,56 @@
     vadd.s32        d1,  d2,  d3
     vpadd.s32       d0,  d0,  d1
 
-    vmov.32         r0,  r1,  d0
+    vmov            r0,  r1,  d0
     vst1.32         {d0[1]}, [ip,:32]
     mul             r0,  r0,  r0
     sub             r0,  r1,  r0,  lsr #6
     bx              lr
 .endfunc
 
+function x264_pixel_var2_8x16_neon
+    vld1.64         {d16}, [r0,:64], r1
+    vld1.64         {d17}, [r2,:64], r3
+    vld1.64         {d18}, [r0,:64], r1
+    vld1.64         {d19}, [r2,:64], r3
+    vsubl.u8        q10, d16, d17
+    vsubl.u8        q11, d18, d19
+    SQR_ACC         q1,  d20, d21,  vmull.s16
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q10, q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23,  vmull.s16
+    mov             ip,  #14
+1:  subs            ip,  ip,  #2
+    vld1.64         {d18}, [r0,:64], r1
+    vsubl.u8        q10, d16, d17
+    vld1.64         {d19}, [r2,:64], r3
+    vadd.s16        q0,  q0,  q10
+    SQR_ACC         q1,  d20, d21
+    vsubl.u8        q11, d18, d19
+    beq             2f
+    vld1.64         {d16}, [r0,:64], r1
+    vadd.s16        q0,  q0,  q11
+    vld1.64         {d17}, [r2,:64], r3
+    SQR_ACC         q2,  d22, d23
+    b               1b
+2:
+    vadd.s16        q0,  q0,  q11
+    SQR_ACC         q2,  d22, d23
+
+    ldr             ip,  [sp]
+    vadd.s16        d0,  d0,  d1
+    vadd.s32        q1,  q1,  q2
+    vpaddl.s16      d0,  d0
+    vadd.s32        d1,  d2,  d3
+    vpadd.s32       d0,  d0,  d1
+
+    vmov            r0,  r1,  d0
+    vst1.32         {d0[1]}, [ip,:32]
+    mul             r0,  r0,  r0
+    sub             r0,  r1,  r0,  lsr #7
+    bx              lr
+.endfunc
 
 .macro LOAD_DIFF_8x4 q0 q1 q2 q3
     vld1.32     {d1}, [r2], r3
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -56,8 +56,10 @@
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
 
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
-int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
 
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: arm pixel metrics
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -56,8 +56,10 @@
 int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
 
 uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
 uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
-int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
 
 uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
 uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-a.S Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.S: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *          Mans Rullgard <mans@mansr.com>
@@ -181,9 +181,9 @@
 
 function x264_predict_8x8_dc_neon
     mov     ip, #0
-    ldrd    r2, [r1, #8]
+    ldrd    r2, r3, [r1, #8]
     push    {r4-r5,lr}
-    ldrd    r4, [r1, #16]
+    ldrd    r4, r5, [r1, #16]
     lsl     r3, r3, #8
     ldrb    lr, [r1, #7]
     usad8   r2, r2, ip
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-c.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,6 +26,16 @@
 #ifndef X264_ARM_PREDICT_H
 #define X264_ARM_PREDICT_H
 
+void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8c_dc_neon( pixel *src );
+void x264_predict_8x8c_h_neon( pixel *src );
+void x264_predict_8x8c_v_neon( pixel *src );
+void x264_predict_16x16_v_neon( pixel *src );
+void x264_predict_16x16_h_neon( pixel *src );
+void x264_predict_16x16_dc_neon( pixel *src );
+
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: arm intra prediction
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -26,6 +26,16 @@
 #ifndef X264_ARM_PREDICT_H
 #define X264_ARM_PREDICT_H
 
+void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
+void x264_predict_8x8c_dc_neon( pixel *src );
+void x264_predict_8x8c_h_neon( pixel *src );
+void x264_predict_8x8c_v_neon( pixel *src );
+void x264_predict_16x16_v_neon( pixel *src );
+void x264_predict_16x16_h_neon( pixel *src );
+void x264_predict_16x16_dc_neon( pixel *src );
+
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/quant-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant-a.S Changed

@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -312,7 +312,7 @@
 
 // int coeff_last( int16_t *l )
 function x264_coeff_last4_arm
-    ldrd        r2,  [r0]
+    ldrd        r2,  r3,  [r0]
     subs        r0,  r3,  #0
     movne       r0,  #2
     movne       r2,  r3
@@ -341,7 +341,7 @@
 
     subs        r1,  ip,  r1,  lsr #2
     addge       r0,  r1,  #\size - 8
-    sublts      r0,  r3,  r0,  lsr #2
+    subslt      r0,  r3,  r0,  lsr #2
     movlt       r0,  #0
     bx          lr
 .endfunc
@@ -390,7 +390,7 @@
 
     subs        r1,  ip,  r1
     addge       r0,  r1,  #32
-    sublts      r0,  ip,  r0
+    subslt      r0,  ip,  r0
     movlt       r0,  #0
     bx          lr
 .endfunc

 
@@ -1,7 +1,7 @@
 /****************************************************************************
  * quant.S: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
@@ -312,7 +312,7 @@
 
 // int coeff_last( int16_t *l )
 function x264_coeff_last4_arm
-    ldrd        r2,  [r0]
+    ldrd        r2,  r3,  [r0]
     subs        r0,  r3,  #0
     movne       r0,  #2
     movne       r2,  r3
@@ -341,7 +341,7 @@
 
     subs        r1,  ip,  r1,  lsr #2
     addge       r0,  r1,  #\size - 8
-    sublts      r0,  r3,  r0,  lsr #2
+    subslt      r0,  r3,  r0,  lsr #2
     movlt       r0,  #0
     bx          lr
 .endfunc
@@ -390,7 +390,7 @@
 
     subs        r1,  ip,  r1
     addge       r0,  r1,  #32
-    sublts      r0,  ip,  r0
+    subslt      r0,  ip,  r0
     movlt       r0,  #0
     bx          lr
 .endfunc
​

x264-snapshot-20130723-2245.tar.bz2/common/arm/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: arm quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: David Conrad <lessen42@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/bitstream.c -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.c: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/bitstream.h -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.h: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.c: arithmetic coder
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/cabac.h -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.h: arithmetic coder
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/common.c -> x264-snapshot-20140321-2245.tar.bz2/common/common.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.c: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -32,6 +32,9 @@
 #if HAVE_MALLOC_H
 #include <malloc.h>
 #endif
+#if HAVE_THP
+#include <sys/mman.h>
+#endif
 
 const int x264_bit_depth = BIT_DEPTH;
 
@@ -342,7 +345,7 @@
             param->analyse.i_luma_deadzone[1] = 6;
             param->rc.f_qcompress = 0.8;
         }
-        else if( !strncasecmp( s, "stillimage", 5 ) )
+        else if( !strncasecmp( s, "stillimage", 10 ) )
         {
             if( psy_tuning_used++ ) goto psy_failure;
             param->i_deblocking_filter_alphac0 = -3;
@@ -668,6 +671,8 @@
     }
     OPT("bluray-compat")
         p->b_bluray_compat = atobool(value);
+    OPT("avcintra-class")
+        p->i_avcintra_class = atoi(value);
     OPT("sar")
     {
         b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
@@ -876,10 +881,6 @@
     }
     OPT("log")
         p->i_log_level = atoi(value);
-#if HAVE_VISUALIZE
-    OPT("visualize")
-        p->b_visualize = atobool(value);
-#endif
     OPT("dump-yuv")
         p->psz_dump_yuv = strdup(value);
     OPT2("analyse", "partitions")
@@ -1031,6 +1032,8 @@
         p->b_vfr_input = !atobool(value);
     OPT("nal-hrd")
         b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
+    OPT("filler")
+        p->rc.b_filler = atobool(value);
     OPT("pic-struct")
         p->b_pic_struct = atobool(value);
     OPT("fake-interlaced")
@@ -1099,7 +1102,7 @@
             break;
     }
     fprintf( stderr, "x264 [%s]: ", psz_prefix );
-    vfprintf( stderr, psz_fmt, arg );
+    x264_vfprintf( stderr, psz_fmt, arg );
 }
 
 /****************************************************************************
@@ -1141,7 +1144,7 @@
     };
 
     int csp = i_csp & X264_CSP_MASK;
-    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX )
+    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
         return -1;
     x264_picture_init( pic );
     pic->img.i_csp = i_csp;
@@ -1183,7 +1186,25 @@
 {
     uint8_t *align_buf = NULL;
 #if HAVE_MALLOC_H
-    align_buf = memalign( NATIVE_ALIGN, i_size );
+#if HAVE_THP
+#define HUGE_PAGE_SIZE 2*1024*1024
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
+    /* Attempt to allocate huge pages to reduce TLB misses. */
+    if( i_size >= HUGE_PAGE_THRESHOLD )
+    {
+        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
+        if( align_buf )
+        {
+            /* Round up to the next huge page boundary if we are close enough. */
+            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
+            madvise( align_buf, madv_size, MADV_HUGEPAGE );
+        }
+    }
+    else
+#undef HUGE_PAGE_SIZE
+#undef HUGE_PAGE_THRESHOLD
+#endif
+        align_buf = memalign( NATIVE_ALIGN, i_size );
 #else
     uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
     if( buf )
@@ -1246,7 +1267,7 @@
     int b_error = 0;
     size_t i_size;
     char *buf;
-    FILE *fh = fopen( filename, "rb" );
+    FILE *fh = x264_fopen( filename, "rb" );
     if( !fh )
         return NULL;
     b_error |= fseek( fh, 0, SEEK_END ) < 0;
@@ -1383,7 +1404,7 @@
         s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
 
     if( p->rc.i_vbv_buffer_size )
-        s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] );
+        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
     if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
         s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
                                                    p->crop_rect.i_right, p->crop_rect.i_bottom );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.c: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -32,6 +32,9 @@
 #if HAVE_MALLOC_H
 #include <malloc.h>
 #endif
+#if HAVE_THP
+#include <sys/mman.h>
+#endif
 
 const int x264_bit_depth = BIT_DEPTH;
 
@@ -342,7 +345,7 @@
             param->analyse.i_luma_deadzone[1] = 6;
             param->rc.f_qcompress = 0.8;
         }
-        else if( !strncasecmp( s, "stillimage", 5 ) )
+        else if( !strncasecmp( s, "stillimage", 10 ) )
         {
             if( psy_tuning_used++ ) goto psy_failure;
             param->i_deblocking_filter_alphac0 = -3;
@@ -668,6 +671,8 @@
     }
     OPT("bluray-compat")
         p->b_bluray_compat = atobool(value);
+    OPT("avcintra-class")
+        p->i_avcintra_class = atoi(value);
     OPT("sar")
     {
         b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
@@ -876,10 +881,6 @@
     }
     OPT("log")
         p->i_log_level = atoi(value);
-#if HAVE_VISUALIZE
-    OPT("visualize")
-        p->b_visualize = atobool(value);
-#endif
     OPT("dump-yuv")
         p->psz_dump_yuv = strdup(value);
     OPT2("analyse", "partitions")
@@ -1031,6 +1032,8 @@
         p->b_vfr_input = !atobool(value);
     OPT("nal-hrd")
         b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
+    OPT("filler")
+        p->rc.b_filler = atobool(value);
     OPT("pic-struct")
         p->b_pic_struct = atobool(value);
     OPT("fake-interlaced")
@@ -1099,7 +1102,7 @@
             break;
     }
     fprintf( stderr, "x264 [%s]: ", psz_prefix );
-    vfprintf( stderr, psz_fmt, arg );
+    x264_vfprintf( stderr, psz_fmt, arg );
 }
 
 /****************************************************************************
@@ -1141,7 +1144,7 @@
     };
 
     int csp = i_csp & X264_CSP_MASK;
-    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX )
+    if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
         return -1;
     x264_picture_init( pic );
     pic->img.i_csp = i_csp;
@@ -1183,7 +1186,25 @@
 {
     uint8_t *align_buf = NULL;
 #if HAVE_MALLOC_H
-    align_buf = memalign( NATIVE_ALIGN, i_size );
+#if HAVE_THP
+#define HUGE_PAGE_SIZE 2*1024*1024
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
+    /* Attempt to allocate huge pages to reduce TLB misses. */
+    if( i_size >= HUGE_PAGE_THRESHOLD )
+    {
+        align_buf = memalign( HUGE_PAGE_SIZE, i_size );
+        if( align_buf )
+        {
+            /* Round up to the next huge page boundary if we are close enough. */
+            size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
+            madvise( align_buf, madv_size, MADV_HUGEPAGE );
+        }
+    }
+    else
+#undef HUGE_PAGE_SIZE
+#undef HUGE_PAGE_THRESHOLD
+#endif
+        align_buf = memalign( NATIVE_ALIGN, i_size );
 #else
     uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
     if( buf )
@@ -1246,7 +1267,7 @@
     int b_error = 0;
     size_t i_size;
     char *buf;
-    FILE *fh = fopen( filename, "rb" );
+    FILE *fh = x264_fopen( filename, "rb" );
     if( !fh )
         return NULL;
     b_error |= fseek( fh, 0, SEEK_END ) < 0;
@@ -1383,7 +1404,7 @@
         s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
 
     if( p->rc.i_vbv_buffer_size )
-        s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] );
+        s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
     if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
         s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
                                                    p->crop_rect.i_right, p->crop_rect.i_bottom );
​

x264-snapshot-20130723-2245.tar.bz2/common/common.h -> x264-snapshot-20140321-2245.tar.bz2/common/common.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.h: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -54,6 +54,31 @@
     memset( var, 0, size );\
 } while( 0 )
 
+/* Macros for merging multiple allocations into a single large malloc, for improved
+ * use with huge pages. */
+
+/* Needs to be enough to contain any set of buffers that use combined allocations */
+#define PREALLOC_BUF_SIZE 1024
+
+#define PREALLOC_INIT\
+    int    prealloc_idx = 0;\
+    size_t prealloc_size = 0;\
+    uint8_t **preallocs[PREALLOC_BUF_SIZE];
+
+#define PREALLOC( var, size )\
+do {\
+    var = (void*)prealloc_size;\
+    preallocs[prealloc_idx++] = (uint8_t**)&var;\
+    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
+} while(0)
+
+#define PREALLOC_END( ptr )\
+do {\
+    CHECKED_MALLOC( ptr, prealloc_size );\
+    while( prealloc_idx-- )\
+        *preallocs[prealloc_idx] += (intptr_t)ptr;\
+} while(0)
+
 #define ARRAY_SIZE(array)  (sizeof(array)/sizeof(array[0]))
 
 #define X264_BFRAME_MAX 16
@@ -84,6 +109,7 @@
 
 #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
 #define FILLER_OVERHEAD (NALU_OVERHEAD+1)
+#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
 
 /****************************************************************************
  * Includes
@@ -491,6 +517,9 @@
     uint8_t *nal_buffer;
     int      nal_buffer_size;
 
+    x264_t          *reconfig_h;
+    int             reconfig;
+
     /**** thread synchronization starts here ****/
 
     /* frame number/poc */
@@ -523,15 +552,15 @@
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[4])[64];   /* [4][6][64] */
     /* quantization matrix for trellis, [cqm][qp][coef] */
-    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
-    int             (*unquant8_mf[4])[64];   /* [4][52][64] */
+    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
     /* quantization matrix for deadzone */
-    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
-    udctcoef        (*quant8_mf[4])[64];     /* [4][52][64] */
-    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
-    udctcoef        (*quant8_bias[4])[64];   /* [4][52][64] */
-    udctcoef        (*quant4_bias0[4])[16];  /* [4][52][16] */
-    udctcoef        (*quant8_bias0[4])[64];  /* [4][52][64] */
+    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
     udctcoef        (*nr_offset_emergency)[4][64];
 
     /* mv/ref cost arrays. */
@@ -699,6 +728,7 @@
          * and won't be copied from one thread to another */
 
         /* mb table */
+        uint8_t *base;                      /* base pointer for all malloced data in this mb */
         int8_t  *type;                      /* mb type */
         uint8_t *partition;                 /* mb partition */
         int8_t  *qp;                        /* mb qp */
@@ -937,9 +967,6 @@
     x264_deblock_function_t loopf;
     x264_bitstream_function_t bsf;
 
-#if HAVE_VISUALIZE
-    struct visualize_t *visualize;
-#endif
     x264_lookahead_t *lookahead;
 
 #if HAVE_OPENCL

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * common.h: misc common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -54,6 +54,31 @@
     memset( var, 0, size );\
 } while( 0 )
 
+/* Macros for merging multiple allocations into a single large malloc, for improved
+ * use with huge pages. */
+
+/* Needs to be enough to contain any set of buffers that use combined allocations */
+#define PREALLOC_BUF_SIZE 1024
+
+#define PREALLOC_INIT\
+    int    prealloc_idx = 0;\
+    size_t prealloc_size = 0;\
+    uint8_t **preallocs[PREALLOC_BUF_SIZE];
+
+#define PREALLOC( var, size )\
+do {\
+    var = (void*)prealloc_size;\
+    preallocs[prealloc_idx++] = (uint8_t**)&var;\
+    prealloc_size += ALIGN(size, NATIVE_ALIGN);\
+} while(0)
+
+#define PREALLOC_END( ptr )\
+do {\
+    CHECKED_MALLOC( ptr, prealloc_size );\
+    while( prealloc_idx-- )\
+        *preallocs[prealloc_idx] += (intptr_t)ptr;\
+} while(0)
+
 #define ARRAY_SIZE(array)  (sizeof(array)/sizeof(array[0]))
 
 #define X264_BFRAME_MAX 16
@@ -84,6 +109,7 @@
 
 #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
 #define FILLER_OVERHEAD (NALU_OVERHEAD+1)
+#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
 
 /****************************************************************************
  * Includes
@@ -491,6 +517,9 @@
     uint8_t *nal_buffer;
     int      nal_buffer_size;
 
+    x264_t          *reconfig_h;
+    int             reconfig;
+
     /**** thread synchronization starts here ****/
 
     /* frame number/poc */
@@ -523,15 +552,15 @@
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[4])[64];   /* [4][6][64] */
     /* quantization matrix for trellis, [cqm][qp][coef] */
-    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
-    int             (*unquant8_mf[4])[64];   /* [4][52][64] */
+    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
     /* quantization matrix for deadzone */
-    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
-    udctcoef        (*quant8_mf[4])[64];     /* [4][52][64] */
-    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
-    udctcoef        (*quant8_bias[4])[64];   /* [4][52][64] */
-    udctcoef        (*quant4_bias0[4])[16];  /* [4][52][16] */
-    udctcoef        (*quant8_bias0[4])[64];  /* [4][52][64] */
+    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
+    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
+    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
     udctcoef        (*nr_offset_emergency)[4][64];
 
     /* mv/ref cost arrays. */
@@ -699,6 +728,7 @@
          * and won't be copied from one thread to another */
 
         /* mb table */
+        uint8_t *base;                      /* base pointer for all malloced data in this mb */
         int8_t  *type;                      /* mb type */
         uint8_t *partition;                 /* mb partition */
         int8_t  *qp;                        /* mb qp */
@@ -937,9 +967,6 @@
     x264_deblock_function_t loopf;
     x264_bitstream_function_t bsf;
 
-#if HAVE_VISUALIZE
-    struct visualize_t *visualize;
-#endif
     x264_lookahead_t *lookahead;
 
 #if HAVE_OPENCL
​

x264-snapshot-20130723-2245.tar.bz2/common/cpu.c -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.c: cpu detection
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -74,7 +74,6 @@
 #undef MMX2
     {"Cache32",         X264_CPU_CACHELINE_32},
     {"Cache64",         X264_CPU_CACHELINE_64},
-    {"SSEMisalign",     X264_CPU_SSE_MISALIGN},
     {"LZCNT",           X264_CPU_LZCNT},
     {"BMI1",            X264_CPU_BMI1},
     {"BMI2",            X264_CPU_BMI1|X264_CPU_BMI2},
@@ -123,7 +122,7 @@
     uint32_t cpu = 0;
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = {0};
-    uint32_t max_extended_cap;
+    uint32_t max_extended_cap, max_basic_cap;
     int cache;
 
 #if !ARCH_X86_64
@@ -132,7 +131,8 @@
 #endif
 
     x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
-    if( eax == 0 )
+    max_basic_cap = eax;
+    if( max_basic_cap == 0 )
         return 0;
 
     x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
@@ -169,15 +169,18 @@
         }
     }
 
-    x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
-    /* AVX2 requires OS support, but BMI1/2 don't. */
-    if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
-        cpu |= X264_CPU_AVX2;
-    if( ebx&0x00000008 )
+    if( max_basic_cap >= 7 )
     {
-        cpu |= X264_CPU_BMI1;
-        if( ebx&0x00000100 )
-            cpu |= X264_CPU_BMI2;
+        x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+        /* AVX2 requires OS support, but BMI1/2 don't. */
+        if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+            cpu |= X264_CPU_AVX2;
+        if( ebx&0x00000008 )
+        {
+            cpu |= X264_CPU_BMI1;
+            if( ebx&0x00000100 )
+                cpu |= X264_CPU_BMI2;
+        }
     }
 
     if( cpu & X264_CPU_SSSE3 )
@@ -210,12 +213,6 @@
             }
         }
 
-        if( ecx&0x00000080 ) /* Misalign SSE */
-        {
-            cpu |= X264_CPU_SSE_MISALIGN;
-            x264_cpu_mask_misalign_sse();
-        }
-
         if( cpu & X264_CPU_AVX )
         {
             if( ecx&0x00000800 ) /* XOP */
@@ -274,7 +271,7 @@
             x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
             cache = ecx&0xff; // cacheline size
         }
-        if( !cache )
+        if( !cache && max_basic_cap >= 2 )
         {
             // Cache and TLB Information
             static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
@@ -307,7 +304,7 @@
             x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
     }
 
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
     cpu |= X264_CPU_STACK_MOD4;
 #endif
 
@@ -429,6 +426,10 @@
     return sysconf( _SC_NPROCESSORS_ONLN );
 
 #elif SYS_LINUX
+#ifdef __ANDROID__
+    // Android NDK does not expose sched_getaffinity
+    return sysconf( _SC_NPROCESSORS_CONF );
+#else
     cpu_set_t p_aff;
     memset( &p_aff, 0, sizeof(p_aff) );
     if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
@@ -441,6 +442,7 @@
         np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
     return np;
 #endif
+#endif
 
 #elif SYS_BEOS
     system_info info;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.c: cpu detection
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -74,7 +74,6 @@
 #undef MMX2
     {"Cache32",         X264_CPU_CACHELINE_32},
     {"Cache64",         X264_CPU_CACHELINE_64},
-    {"SSEMisalign",     X264_CPU_SSE_MISALIGN},
     {"LZCNT",           X264_CPU_LZCNT},
     {"BMI1",            X264_CPU_BMI1},
     {"BMI2",            X264_CPU_BMI1|X264_CPU_BMI2},
@@ -123,7 +122,7 @@
     uint32_t cpu = 0;
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = {0};
-    uint32_t max_extended_cap;
+    uint32_t max_extended_cap, max_basic_cap;
     int cache;
 
 #if !ARCH_X86_64
@@ -132,7 +131,8 @@
 #endif
 
     x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
-    if( eax == 0 )
+    max_basic_cap = eax;
+    if( max_basic_cap == 0 )
         return 0;
 
     x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
@@ -169,15 +169,18 @@
         }
     }
 
-    x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
-    /* AVX2 requires OS support, but BMI1/2 don't. */
-    if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
-        cpu |= X264_CPU_AVX2;
-    if( ebx&0x00000008 )
+    if( max_basic_cap >= 7 )
     {
-        cpu |= X264_CPU_BMI1;
-        if( ebx&0x00000100 )
-            cpu |= X264_CPU_BMI2;
+        x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
+        /* AVX2 requires OS support, but BMI1/2 don't. */
+        if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
+            cpu |= X264_CPU_AVX2;
+        if( ebx&0x00000008 )
+        {
+            cpu |= X264_CPU_BMI1;
+            if( ebx&0x00000100 )
+                cpu |= X264_CPU_BMI2;
+        }
     }
 
     if( cpu & X264_CPU_SSSE3 )
@@ -210,12 +213,6 @@
             }
         }
 
-        if( ecx&0x00000080 ) /* Misalign SSE */
-        {
-            cpu |= X264_CPU_SSE_MISALIGN;
-            x264_cpu_mask_misalign_sse();
-        }
-
         if( cpu & X264_CPU_AVX )
         {
             if( ecx&0x00000800 ) /* XOP */
@@ -274,7 +271,7 @@
             x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
             cache = ecx&0xff; // cacheline size
         }
-        if( !cache )
+        if( !cache && max_basic_cap >= 2 )
         {
             // Cache and TLB Information
             static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
@@ -307,7 +304,7 @@
             x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
     }
 
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
     cpu |= X264_CPU_STACK_MOD4;
 #endif
 
@@ -429,6 +426,10 @@
     return sysconf( _SC_NPROCESSORS_ONLN );
 
 #elif SYS_LINUX
+#ifdef __ANDROID__
+    // Android NDK does not expose sched_getaffinity
+    return sysconf( _SC_NPROCESSORS_CONF );
+#else
     cpu_set_t p_aff;
     memset( &p_aff, 0, sizeof(p_aff) );
     if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
@@ -441,6 +442,7 @@
         np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
     return np;
 #endif
+#endif
 
 #elif SYS_BEOS
     system_info info;
​

x264-snapshot-20130723-2245.tar.bz2/common/cpu.h -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.h: cpu detection
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -45,7 +45,6 @@
 #define x264_emms()
 #endif
 #define x264_sfence x264_cpu_sfence
-void     x264_cpu_mask_misalign_sse( void );
 void     x264_safe_intel_cpu_indicator_init( void );
 
 /* kludge:
@@ -58,8 +57,8 @@
  * alignment between functions (osdep.h handles manual alignment of arrays
  * if it doesn't).
  */
-#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
-int x264_stack_align( void (*func)(), ... );
+#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX
+intptr_t x264_stack_align( void (*func)(), ... );
 #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
 #else
 #define x264_stack_align(func,...) func(__VA_ARGS__)

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cpu.h: cpu detection
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -45,7 +45,6 @@
 #define x264_emms()
 #endif
 #define x264_sfence x264_cpu_sfence
-void     x264_cpu_mask_misalign_sse( void );
 void     x264_safe_intel_cpu_indicator_init( void );
 
 /* kludge:
@@ -58,8 +57,8 @@
  * alignment between functions (osdep.h handles manual alignment of arrays
  * if it doesn't).
  */
-#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
-int x264_stack_align( void (*func)(), ... );
+#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX
+intptr_t x264_stack_align( void (*func)(), ... );
 #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
 #else
 #define x264_stack_align(func,...) func(__VA_ARGS__)
​

x264-snapshot-20130723-2245.tar.bz2/common/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/dct.c Changed

 
@@ -1,11 +1,11 @@
 /*****************************************************************************
  * dct.c: transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: transform and zigzag
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/deblock.c Changed

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * deblock.c: deblocking
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/frame.c -> x264-snapshot-20140321-2245.tar.bz2/common/frame.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.c: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -53,6 +53,7 @@
         case X264_CSP_NV16:
         case X264_CSP_I422:
         case X264_CSP_YV16:
+        case X264_CSP_V210:
             return X264_CSP_NV16;
         case X264_CSP_I444:
         case X264_CSP_YV24:
@@ -86,6 +87,7 @@
 #endif
 
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
+    PREALLOC_INIT
 
     /* allocate frame data (+64 for extra data for me) */
     i_width  = h->mb.i_mb_width*16;
@@ -124,7 +126,7 @@
 
     for( int i = 0; i < h->param.i_bframe + 2; i++ )
         for( int j = 0; j < h->param.i_bframe + 2; j++ )
-            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+            PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 
     frame->i_poc = -1;
     frame->i_type = X264_TYPE_AUTO;
@@ -149,13 +151,9 @@
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
         if( PARAM_INTERLACED )
-        {
-            CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
-        }
+            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
     }
 
     /* all 4 luma planes allocated together, since the cacheline split code
@@ -167,24 +165,15 @@
         if( h->param.analyse.i_subpel_refine && b_fdec )
         {
             /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-            CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-            {
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-            }
-            frame->plane[p] = frame->filtered[p][0];
-            frame->plane_fld[p] = frame->filtered_fld[p][0];
+                PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
         }
         else
         {
-            CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
-            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+                PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
         }
     }
 
@@ -192,36 +181,30 @@
 
     if( b_fdec ) /* fdec frame */
     {
-        CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
-        CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
-        CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
-        CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
-        M32( frame->mv16x16[0] ) = 0;
-        frame->mv16x16++;
-        CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
+        PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+        PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
+        PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
         if( h->param.i_bframe )
         {
-            CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
-            CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
+            PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
+            PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
         }
         else
         {
             frame->mv[1]  = NULL;
             frame->ref[1] = NULL;
         }
-        CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
-        CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
-        CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
+        PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
-        {
-            CHECKED_MALLOC( frame->buffer[3],
-                            frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
-            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
-        }
+            PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
         if( PARAM_INTERLACED )
-            CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
         if( h->param.analyse.b_mb_info )
-            CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
     }
     else /* fenc frame */
     {
@@ -229,30 +212,85 @@
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
 
-            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+            PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
                 {
-                    CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
-                    CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+                    PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+                    PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
                 }
-            CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
+            PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
             for( int j = 0; j <= h->param.i_bframe+1; j++ )
                 for( int i = 0; i <= h->param.i_bframe+1; i++ )
-                    CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
-            frame->i_intra_cost = frame->lowres_costs[0][0];
-            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+                    PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
+
         }
         if( h->param.rc.i_aq_mode )
         {
-            CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
-            CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
             if( h->frames.b_have_lowres )
+                PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+        }
+    }
+
+    PREALLOC_END( frame->base );
+
+    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
+    {
+        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        if( PARAM_INTERLACED )
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
+    }
+
+    for( int p = 0; p < luma_plane_count; p++ )
+    {
+        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+        if( h->param.analyse.i_subpel_refine && b_fdec )
+        {
+            for( int i = 0; i < 4; i++ )
+            {
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+            }
+            frame->plane[p] = frame->filtered[p][0];
+            frame->plane_fld[p] = frame->filtered_fld[p][0];
+        }
+        else
+        {
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
+            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+        }
+    }
+
+    if( b_fdec )
+    {
+        M32( frame->mv16x16[0] ) = 0;
+        frame->mv16x16++;
+
+        if( h->param.analyse.i_me_method >= X264_ME_ESA )
+            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
+    }
+    else
+    {
+        if( h->frames.b_have_lowres )
+        {
+            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
+            for( int i = 0; i < 4; i++ )
+                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+
+            for( int j = 0; j <= !!h->param.i_bframe; j++ )
+                for( int i = 0; i <= h->param.i_bframe; i++ )
+                    memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
+
+            frame->i_intra_cost = frame->lowres_costs[0][0];
+            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+
+            if( h->param.rc.i_aq_mode )
                 /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
-                CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+                memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
         }
     }
 
@@ -278,42 +316,8 @@
      * so freeing those pointers would cause a double free later. */
     if( !frame->b_duplicate )
     {
-        for( int i = 0; i < 4; i++ )
-        {
-            x264_free( frame->buffer[i] );
-            x264_free( frame->buffer_fld[i] );
-        }
-        for( int i = 0; i < 4; i++ )
-            x264_free( frame->buffer_lowres[i] );
-        for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
-            for( int j = 0; j < X264_BFRAME_MAX+2; j++ )
-                x264_free( frame->i_row_satds[i][j] );
-        for( int j = 0; j < 2; j++ )
-            for( int i = 0; i <= X264_BFRAME_MAX; i++ )
-            {
-                x264_free( frame->lowres_mvs[j][i] );
-                x264_free( frame->lowres_mv_costs[j][i] );
-            }
-        x264_free( frame->i_propagate_cost );
-        for( int j = 0; j <= X264_BFRAME_MAX+1; j++ )
-            for( int i = 0; i <= X264_BFRAME_MAX+1; i++ )
-                x264_free( frame->lowres_costs[j][i] );
-        x264_free( frame->f_qp_offset );
-        x264_free( frame->f_qp_offset_aq );
-        x264_free( frame->i_inv_qscale_factor );
-        x264_free( frame->i_row_bits );
-        x264_free( frame->f_row_qp );
-        x264_free( frame->f_row_qscale );
-        x264_free( frame->field );
-        x264_free( frame->effective_qp );
-        x264_free( frame->mb_type );
-        x264_free( frame->mb_partition );
-        x264_free( frame->mv[0] );
-        x264_free( frame->mv[1] );
-        if( frame->mv16x16 )
-            x264_free( frame->mv16x16-1 );
-        x264_free( frame->ref[0] );
-        x264_free( frame->ref[1] );
+        x264_free( frame->base );
+
         if( frame->param && frame->param->param_free )
             frame->param->param_free( frame->param );
         if( frame->mb_info_free )
@@ -377,6 +381,12 @@
     }
 #endif
 
+    if( BIT_DEPTH != 10 && i_csp == X264_CSP_V210 )
+    {
+        x264_log( h, X264_LOG_ERROR, "v210 input is only compatible with bit-depth of 10 bits\n" );
+        return -1;
+    }
+
     dst->i_type     = src->i_type;
     dst->i_qpplus1  = src->i_qpplus1;
     dst->i_pts      = dst->i_reordered_pts = src->i_pts;
@@ -389,7 +399,16 @@
 
     uint8_t *pix[3];
     int stride[3];
-    if ( i_csp >= X264_CSP_BGR )
+    if( i_csp == X264_CSP_V210 )
+    {
+         stride[0] = src->img.i_stride[0];
+         pix[0] = src->img.plane[0];
+
+         h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0],
+                                             dst->plane[1], dst->i_stride[1],
+                                             (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height );
+    }
+    else if( i_csp >= X264_CSP_BGR )
     {
          stride[0] = src->img.i_stride[0];
          pix[0] = src->img.plane[0];

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.c: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -53,6 +53,7 @@
         case X264_CSP_NV16:
         case X264_CSP_I422:
         case X264_CSP_YV16:
+        case X264_CSP_V210:
             return X264_CSP_NV16;
         case X264_CSP_I444:
         case X264_CSP_YV24:
@@ -86,6 +87,7 @@
 #endif
 
     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
+    PREALLOC_INIT
 
     /* allocate frame data (+64 for extra data for me) */
     i_width  = h->mb.i_mb_width*16;
@@ -124,7 +126,7 @@
 
     for( int i = 0; i < h->param.i_bframe + 2; i++ )
         for( int j = 0; j < h->param.i_bframe + 2; j++ )
-            CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+            PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 
     frame->i_poc = -1;
     frame->i_type = X264_TYPE_AUTO;
@@ -149,13 +151,9 @@
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
-        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
         if( PARAM_INTERLACED )
-        {
-            CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
-            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
-        }
+            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
     }
 
     /* all 4 luma planes allocated together, since the cacheline split code
@@ -167,24 +165,15 @@
         if( h->param.analyse.i_subpel_refine && b_fdec )
         {
             /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-            CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-            {
-                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
-            }
-            frame->plane[p] = frame->filtered[p][0];
-            frame->plane_fld[p] = frame->filtered_fld[p][0];
+                PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
         }
         else
         {
-            CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
+            PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
             if( PARAM_INTERLACED )
-                CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
-            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
-            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+                PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
         }
     }
 
@@ -192,36 +181,30 @@
 
     if( b_fdec ) /* fdec frame */
     {
-        CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
-        CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
-        CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
-        CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
-        M32( frame->mv16x16[0] ) = 0;
-        frame->mv16x16++;
-        CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
+        PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
+        PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
+        PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
+        PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
         if( h->param.i_bframe )
         {
-            CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
-            CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
+            PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
+            PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
         }
         else
         {
             frame->mv[1]  = NULL;
             frame->ref[1] = NULL;
         }
-        CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
-        CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
-        CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
+        PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
+        PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
-        {
-            CHECKED_MALLOC( frame->buffer[3],
-                            frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
-            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
-        }
+            PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
         if( PARAM_INTERLACED )
-            CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
         if( h->param.analyse.b_mb_info )
-            CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
+            PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
     }
     else /* fenc frame */
     {
@@ -229,30 +212,85 @@
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
 
-            CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
-            for( int i = 0; i < 4; i++ )
-                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+            PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
 
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
                 {
-                    CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
-                    CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+                    PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+                    PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
                 }
-            CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
+            PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
             for( int j = 0; j <= h->param.i_bframe+1; j++ )
                 for( int i = 0; i <= h->param.i_bframe+1; i++ )
-                    CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
-            frame->i_intra_cost = frame->lowres_costs[0][0];
-            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+                    PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
+
         }
         if( h->param.rc.i_aq_mode )
         {
-            CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
-            CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+            PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
             if( h->frames.b_have_lowres )
+                PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+        }
+    }
+
+    PREALLOC_END( frame->base );
+
+    if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
+    {
+        int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
+        frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
+        if( PARAM_INTERLACED )
+            frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
+    }
+
+    for( int p = 0; p < luma_plane_count; p++ )
+    {
+        int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
+        if( h->param.analyse.i_subpel_refine && b_fdec )
+        {
+            for( int i = 0; i < 4; i++ )
+            {
+                frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+                frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
+            }
+            frame->plane[p] = frame->filtered[p][0];
+            frame->plane_fld[p] = frame->filtered_fld[p][0];
+        }
+        else
+        {
+            frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
+            frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
+        }
+    }
+
+    if( b_fdec )
+    {
+        M32( frame->mv16x16[0] ) = 0;
+        frame->mv16x16++;
+
+        if( h->param.analyse.i_me_method >= X264_ME_ESA )
+            frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
+    }
+    else
+    {
+        if( h->frames.b_have_lowres )
+        {
+            int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
+            for( int i = 0; i < 4; i++ )
+                frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
+
+            for( int j = 0; j <= !!h->param.i_bframe; j++ )
+                for( int i = 0; i <= h->param.i_bframe; i++ )
+                    memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
+
+            frame->i_intra_cost = frame->lowres_costs[0][0];
+            memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
+
+            if( h->param.rc.i_aq_mode )
                 /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
-                CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
+                memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
         }
     }
 
@@ -278,42 +316,8 @@
      * so freeing those pointers would cause a double free later. */
     if( !frame->b_duplicate )
     {
-        for( int i = 0; i < 4; i++ )
-        {
-            x264_free( frame->buffer[i] );
-            x264_free( frame->buffer_fld[i] );
-        }
-        for( int i = 0; i < 4; i++ )
-            x264_free( frame->buffer_lowres[i] );
-        for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
-            for( int j = 0; j < X264_BFRAME_MAX+2; j++ )
-                x264_free( frame->i_row_satds[i][j] );
-        for( int j = 0; j < 2; j++ )
-            for( int i = 0; i <= X264_BFRAME_MAX; i++ )
-            {
-                x264_free( frame->lowres_mvs[j][i] );
-                x264_free( frame->lowres_mv_costs[j][i] );
-            }
-        x264_free( frame->i_propagate_cost );
-        for( int j = 0; j <= X264_BFRAME_MAX+1; j++ )
-            for( int i = 0; i <= X264_BFRAME_MAX+1; i++ )
-                x264_free( frame->lowres_costs[j][i] );
-        x264_free( frame->f_qp_offset );
-        x264_free( frame->f_qp_offset_aq );
-        x264_free( frame->i_inv_qscale_factor );
-        x264_free( frame->i_row_bits );
-        x264_free( frame->f_row_qp );
-        x264_free( frame->f_row_qscale );
-        x264_free( frame->field );
-        x264_free( frame->effective_qp );
-        x264_free( frame->mb_type );
-        x264_free( frame->mb_partition );
-        x264_free( frame->mv[0] );
-        x264_free( frame->mv[1] );
-        if( frame->mv16x16 )
-            x264_free( frame->mv16x16-1 );
-        x264_free( frame->ref[0] );
-        x264_free( frame->ref[1] );
+        x264_free( frame->base );
+
         if( frame->param && frame->param->param_free )
             frame->param->param_free( frame->param );
         if( frame->mb_info_free )
@@ -377,6 +381,12 @@
     }
 #endif
 
+    if( BIT_DEPTH != 10 && i_csp == X264_CSP_V210 )
+    {
+        x264_log( h, X264_LOG_ERROR, "v210 input is only compatible with bit-depth of 10 bits\n" );
+        return -1;
+    }
+
     dst->i_type     = src->i_type;
     dst->i_qpplus1  = src->i_qpplus1;
     dst->i_pts      = dst->i_reordered_pts = src->i_pts;
@@ -389,7 +399,16 @@
 
     uint8_t *pix[3];
     int stride[3];
-    if ( i_csp >= X264_CSP_BGR )
+    if( i_csp == X264_CSP_V210 )
+    {
+         stride[0] = src->img.i_stride[0];
+         pix[0] = src->img.plane[0];
+
+         h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0],
+                                             dst->plane[1], dst->i_stride[1],
+                                             (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height );
+    }
+    else if( i_csp >= X264_CSP_BGR )
     {
          stride[0] = src->img.i_stride[0];
          pix[0] = src->img.plane[0];
​

x264-snapshot-20130723-2245.tar.bz2/common/frame.h -> x264-snapshot-20140321-2245.tar.bz2/common/frame.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.h: frame handling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -35,6 +35,7 @@
 typedef struct x264_frame
 {
     /* */
+    uint8_t *base;       /* Base pointer for all malloced data in this frame. */
     int     i_poc;
     int     i_delta_poc[2];
     int     i_type;
​

x264-snapshot-20130723-2245.tar.bz2/common/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.c Changed

@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -256,25 +256,26 @@
 
     h->mb.b_interlaced = PARAM_INTERLACED;
 
-    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
-    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
-    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+    PREALLOC_INIT
+
+    PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+    PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
 
     /* 0 -> 3 top(4), 4 -> 6 : left(3) */
-    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+    PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
 
     /* all coeffs */
-    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+    PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
 
     if( h->param.b_cabac )
     {
-        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+        PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
         if( h->param.i_bframe )
-            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+            PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
     }
 
     for( int i = 0; i < 2; i++ )
@@ -284,11 +285,7 @@
             i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
 
         for( int j = !i; j < i_refs; j++ )
-        {
-            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
-            M32( h->mb.mvr[i][j][0] ) = 0;
-            h->mb.mvr[i][j]++;
-        }
+            PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
     }
 
     if( h->param.analyse.i_weighted_pred )
@@ -325,7 +322,24 @@
         }
 
         for( int i = 0; i < numweightbuf; i++ )
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+            PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+    }
+
+    PREALLOC_END( h->mb.base );
+
+    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+    for( int i = 0; i < 2; i++ )
+    {
+        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+        for( int j = !i; j < i_refs; j++ )
+        {
+            M32( h->mb.mvr[i][j][0] ) = 0;
+            h->mb.mvr[i][j]++;
+        }
     }
 
     return 0;
@@ -334,26 +348,7 @@
 }
 void x264_macroblock_cache_free( x264_t *h )
 {
-    for( int i = 0; i < 2; i++ )
-        for( int j = !i; j < X264_REF_MAX*2; j++ )
-            if( h->mb.mvr[i][j] )
-                x264_free( h->mb.mvr[i][j]-1 );
-    for( int i = 0; i < X264_REF_MAX; i++ )
-        x264_free( h->mb.p_weight_buf[i] );
-
-    if( h->param.b_cabac )
-    {
-        x264_free( h->mb.skipbp );
-        x264_free( h->mb.chroma_pred_mode );
-        x264_free( h->mb.mvd[0] );
-        x264_free( h->mb.mvd[1] );
-    }
-    x264_free( h->mb.slice_table );
-    x264_free( h->mb.intra4x4_pred_mode );
-    x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.mb_transform_size );
-    x264_free( h->mb.cbp );
-    x264_free( h->mb.qp );
+    x264_free( h->mb.base );
 }
 
 int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
@@ -394,7 +389,7 @@
             ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
         scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
     }
-    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
     scratch_size = X264_MAX( scratch_size, buf_mbtree );
     if( scratch_size )
         CHECKED_MALLOC( h->scratch_buffer, scratch_size );
@@ -402,7 +397,9 @@
         h->scratch_buffer = NULL;
 
     int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
-    CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
+    int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+    scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+    CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
 
     return 0;
 fail:
@@ -1258,8 +1255,13 @@
         }
     }
 
-    if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
-        h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+    if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+    {
+        if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+            h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+        else
+            h->mb.field_decoding_flag = 0;
+    }
 
     /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
      * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
@@ -1267,26 +1269,8 @@
     if( b_mbaff )
     {
         if( MB_INTERLACED != h->mb.field_decoding_flag &&
-            h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+            (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
             h->mb.b_allow_skip = 0;
-        if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
-        {
-            if( h->mb.i_neighbour & MB_LEFT )
-            {
-                if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else if( h->mb.i_neighbour & MB_TOP )
-            {
-                if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else // Frame mb pair is predicted
-            {
-                if( MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-        }
     }
 
     if( h->param.b_cabac )

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -256,25 +256,26 @@
 
     h->mb.b_interlaced = PARAM_INTERLACED;
 
-    CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
-    CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
-    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+    PREALLOC_INIT
+
+    PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+    PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+    PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
 
     /* 0 -> 3 top(4), 4 -> 6 : left(3) */
-    CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+    PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
 
     /* all coeffs */
-    CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+    PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
 
     if( h->param.b_cabac )
     {
-        CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+        PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+        PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
         if( h->param.i_bframe )
-            CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+            PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
     }
 
     for( int i = 0; i < 2; i++ )
@@ -284,11 +285,7 @@
             i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
 
         for( int j = !i; j < i_refs; j++ )
-        {
-            CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
-            M32( h->mb.mvr[i][j][0] ) = 0;
-            h->mb.mvr[i][j]++;
-        }
+            PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
     }
 
     if( h->param.analyse.i_weighted_pred )
@@ -325,7 +322,24 @@
         }
 
         for( int i = 0; i < numweightbuf; i++ )
-            CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+            PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+    }
+
+    PREALLOC_END( h->mb.base );
+
+    memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+    for( int i = 0; i < 2; i++ )
+    {
+        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+        for( int j = !i; j < i_refs; j++ )
+        {
+            M32( h->mb.mvr[i][j][0] ) = 0;
+            h->mb.mvr[i][j]++;
+        }
     }
 
     return 0;
@@ -334,26 +348,7 @@
 }
 void x264_macroblock_cache_free( x264_t *h )
 {
-    for( int i = 0; i < 2; i++ )
-        for( int j = !i; j < X264_REF_MAX*2; j++ )
-            if( h->mb.mvr[i][j] )
-                x264_free( h->mb.mvr[i][j]-1 );
-    for( int i = 0; i < X264_REF_MAX; i++ )
-        x264_free( h->mb.p_weight_buf[i] );
-
-    if( h->param.b_cabac )
-    {
-        x264_free( h->mb.skipbp );
-        x264_free( h->mb.chroma_pred_mode );
-        x264_free( h->mb.mvd[0] );
-        x264_free( h->mb.mvd[1] );
-    }
-    x264_free( h->mb.slice_table );
-    x264_free( h->mb.intra4x4_pred_mode );
-    x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.mb_transform_size );
-    x264_free( h->mb.cbp );
-    x264_free( h->mb.qp );
+    x264_free( h->mb.base );
 }
 
 int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
@@ -394,7 +389,7 @@
             ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
         scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
     }
-    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+    int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
     scratch_size = X264_MAX( scratch_size, buf_mbtree );
     if( scratch_size )
         CHECKED_MALLOC( h->scratch_buffer, scratch_size );
@@ -402,7 +397,9 @@
         h->scratch_buffer = NULL;
 
     int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
-    CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
+    int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+    scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+    CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
 
     return 0;
 fail:
@@ -1258,8 +1255,13 @@
         }
     }
 
-    if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
-        h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+    if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+    {
+        if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+            h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+        else
+            h->mb.field_decoding_flag = 0;
+    }
 
     /* Check whether skip here would cause decoder to predict interlace mode incorrectly.
      * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
@@ -1267,26 +1269,8 @@
     if( b_mbaff )
     {
         if( MB_INTERLACED != h->mb.field_decoding_flag &&
-            h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+            (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
             h->mb.b_allow_skip = 0;
-        if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
-        {
-            if( h->mb.i_neighbour & MB_LEFT )
-            {
-                if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else if( h->mb.i_neighbour & MB_TOP )
-            {
-                if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-            else // Frame mb pair is predicted
-            {
-                if( MB_INTERLACED )
-                    h->mb.b_allow_skip = 0;
-            }
-        }
     }
 
     if( h->param.b_cabac )
​

x264-snapshot-20130723-2245.tar.bz2/common/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock common functions
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/mc.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -336,6 +336,34 @@
     }
 }
 
+void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h )
+{
+    for( int l = 0; l < h; l++ )
+    {
+        pixel *dsty0 = dsty;
+        pixel *dstc0 = dstc;
+        uint32_t *src0 = src;
+
+        for( int n = 0; n < w; n += 3 )
+        {
+            *(dstc0++) = *src0 & 0x03FF;
+            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+            *(dsty0++) = *src0 & 0x03FF;
+            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+        }
+
+        dsty += i_dsty;
+        dstc += i_dstc;
+        src  += i_src;
+    }
+}
+
 static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
 {
     for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
@@ -455,20 +483,97 @@
 
 /* Estimate the total amount of influence on future quality that could be had if we
  * were to improve the reference samples used to inter predict any given macroblock. */
-static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
 {
-    float fps = *fps_factor / 256.f;
+    float fps = *fps_factor;
     for( int i = 0; i < len; i++ )
     {
-        float intra_cost       = intra_costs[i] * inv_qscales[i];
-        float propagate_amount = propagate_in[i] + intra_cost*fps;
-        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
-        float propagate_denom  = intra_costs[i];
-        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
+        int intra_cost = intra_costs[i];
+        int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
+        float propagate_intra  = intra_cost * inv_qscales[i];
+        float propagate_amount = propagate_in[i] + propagate_intra*fps;
+        float propagate_num    = intra_cost - inter_cost;
+        float propagate_denom  = intra_cost;
+        dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
     }
 }
 
+static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list )
+{
+    unsigned stride = h->mb.i_mb_stride;
+    unsigned width = h->mb.i_mb_width;
+    unsigned height = h->mb.i_mb_height;
+
+    for( unsigned i = 0; i < len; i++ )
+    {
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+        int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
+
+        if( !(lists_used & (1 << list)) )
+            continue;
+
+        int listamount = propagate_amount[i];
+        /* Apply bipred weighting. */
+        if( lists_used == 3 )
+            listamount = (listamount * bipred_weight + 32) >> 6;
+
+        /* Early termination for simple case of mv0. */
+        if( !M32( mvs[i] ) )
+        {
+            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
+            continue;
+        }
+
+        int x = mvs[i][0];
+        int y = mvs[i][1];
+        unsigned mbx = (x>>5)+i;
+        unsigned mby = (y>>5)+mb_y;
+        unsigned idx0 = mbx + mby * stride;
+        unsigned idx2 = idx0 + stride;
+        x &= 31;
+        y &= 31;
+        int idx0weight = (32-y)*(32-x);
+        int idx1weight = (32-y)*x;
+        int idx2weight = y*(32-x);
+        int idx3weight = y*x;
+        idx0weight = (idx0weight * listamount + 512) >> 10;
+        idx1weight = (idx1weight * listamount + 512) >> 10;
+        idx2weight = (idx2weight * listamount + 512) >> 10;
+        idx3weight = (idx3weight * listamount + 512) >> 10;
+
+        if( mbx < width-1 && mby < height-1 )
+        {
+            CLIP_ADD( ref_costs[idx0+0], idx0weight );
+            CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            CLIP_ADD( ref_costs[idx2+0], idx2weight );
+            CLIP_ADD( ref_costs[idx2+1], idx3weight );
+        }
+        else
+        {
+            /* Note: this takes advantage of unsigned representation to
+             * catch negative mbx/mby. */
+            if( mby < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            }
+            if( mby+1 < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
+            }
+        }
+    }
+#undef CLIP_ADD
+}
+
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
 {
     pf->mc_luma   = mc_luma;
@@ -507,6 +612,7 @@
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
 
     pf->hpel_filter = hpel_filter;
 
@@ -523,6 +629,7 @@
     pf->integral_init8v = integral_init8v;
 
     pf->mbtree_propagate_cost = mbtree_propagate_cost;
+    pf->mbtree_propagate_list = mbtree_propagate_list;
 
 #if HAVE_MMX
     x264_mc_init_mmx( cpu, pf );
@@ -536,7 +643,10 @@
 #endif
 
     if( cpu_independent )
+    {
         pf->mbtree_propagate_cost = mbtree_propagate_cost;
+        pf->mbtree_propagate_list = mbtree_propagate_list;
+    }
 }
 
 void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -336,6 +336,34 @@
     }
 }
 
+void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h )
+{
+    for( int l = 0; l < h; l++ )
+    {
+        pixel *dsty0 = dsty;
+        pixel *dstc0 = dstc;
+        uint32_t *src0 = src;
+
+        for( int n = 0; n < w; n += 3 )
+        {
+            *(dstc0++) = *src0 & 0x03FF;
+            *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+            *(dsty0++) = *src0 & 0x03FF;
+            *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
+            *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
+            src0++;
+        }
+
+        dsty += i_dsty;
+        dstc += i_dstc;
+        src  += i_src;
+    }
+}
+
 static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
 {
     for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
@@ -455,20 +483,97 @@
 
 /* Estimate the total amount of influence on future quality that could be had if we
  * were to improve the reference samples used to inter predict any given macroblock. */
-static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
 {
-    float fps = *fps_factor / 256.f;
+    float fps = *fps_factor;
     for( int i = 0; i < len; i++ )
     {
-        float intra_cost       = intra_costs[i] * inv_qscales[i];
-        float propagate_amount = propagate_in[i] + intra_cost*fps;
-        float propagate_num    = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
-        float propagate_denom  = intra_costs[i];
-        dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
+        int intra_cost = intra_costs[i];
+        int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
+        float propagate_intra  = intra_cost * inv_qscales[i];
+        float propagate_amount = propagate_in[i] + propagate_intra*fps;
+        float propagate_num    = intra_cost - inter_cost;
+        float propagate_denom  = intra_cost;
+        dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
     }
 }
 
+static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list )
+{
+    unsigned stride = h->mb.i_mb_stride;
+    unsigned width = h->mb.i_mb_width;
+    unsigned height = h->mb.i_mb_height;
+
+    for( unsigned i = 0; i < len; i++ )
+    {
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+        int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
+
+        if( !(lists_used & (1 << list)) )
+            continue;
+
+        int listamount = propagate_amount[i];
+        /* Apply bipred weighting. */
+        if( lists_used == 3 )
+            listamount = (listamount * bipred_weight + 32) >> 6;
+
+        /* Early termination for simple case of mv0. */
+        if( !M32( mvs[i] ) )
+        {
+            CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
+            continue;
+        }
+
+        int x = mvs[i][0];
+        int y = mvs[i][1];
+        unsigned mbx = (x>>5)+i;
+        unsigned mby = (y>>5)+mb_y;
+        unsigned idx0 = mbx + mby * stride;
+        unsigned idx2 = idx0 + stride;
+        x &= 31;
+        y &= 31;
+        int idx0weight = (32-y)*(32-x);
+        int idx1weight = (32-y)*x;
+        int idx2weight = y*(32-x);
+        int idx3weight = y*x;
+        idx0weight = (idx0weight * listamount + 512) >> 10;
+        idx1weight = (idx1weight * listamount + 512) >> 10;
+        idx2weight = (idx2weight * listamount + 512) >> 10;
+        idx3weight = (idx3weight * listamount + 512) >> 10;
+
+        if( mbx < width-1 && mby < height-1 )
+        {
+            CLIP_ADD( ref_costs[idx0+0], idx0weight );
+            CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            CLIP_ADD( ref_costs[idx2+0], idx2weight );
+            CLIP_ADD( ref_costs[idx2+1], idx3weight );
+        }
+        else
+        {
+            /* Note: this takes advantage of unsigned representation to
+             * catch negative mbx/mby. */
+            if( mby < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx0+0], idx0weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx0+1], idx1weight );
+            }
+            if( mby+1 < height )
+            {
+                if( mbx < width )
+                    CLIP_ADD( ref_costs[idx2+0], idx2weight );
+                if( mbx+1 < width )
+                    CLIP_ADD( ref_costs[idx2+1], idx3weight );
+            }
+        }
+    }
+#undef CLIP_ADD
+}
+
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
 {
     pf->mc_luma   = mc_luma;
@@ -507,6 +612,7 @@
     pf->plane_copy_interleave = x264_plane_copy_interleave_c;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
 
     pf->hpel_filter = hpel_filter;
 
@@ -523,6 +629,7 @@
     pf->integral_init8v = integral_init8v;
 
     pf->mbtree_propagate_cost = mbtree_propagate_cost;
+    pf->mbtree_propagate_list = mbtree_propagate_list;
 
 #if HAVE_MMX
     x264_mc_init_mmx( cpu, pf );
@@ -536,7 +643,10 @@
 #endif
 
     if( cpu_independent )
+    {
         pf->mbtree_propagate_cost = mbtree_propagate_cost;
+        pf->mbtree_propagate_list = mbtree_propagate_list;
+    }
 }
 
 void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
​

x264-snapshot-20130723-2245.tar.bz2/common/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/mc.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: motion compensation
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -93,6 +93,9 @@
                                      pixel *src,  intptr_t i_src, int w, int h );
     void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
                                          pixel *dstc, intptr_t i_dstc, pixel *src,  intptr_t i_src, int pw, int w, int h );
+    void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h );
     void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                          intptr_t i_stride, int i_width, int i_height, int16_t *buf );
 
@@ -119,8 +122,12 @@
     weight_fn_t *offsetsub;
     void (*weight_cache)( x264_t *, x264_weight_t * );
 
-    void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+    void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+
+    void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list );
 } x264_mc_functions_t;
 
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: motion compensation
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -93,6 +93,9 @@
                                      pixel *src,  intptr_t i_src, int w, int h );
     void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
                                          pixel *dstc, intptr_t i_dstc, pixel *src,  intptr_t i_src, int pw, int w, int h );
+    void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
+                                          pixel *dstc, intptr_t i_dstc,
+                                          uint32_t *src, intptr_t i_src, int w, int h );
     void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
                          intptr_t i_stride, int i_width, int i_height, int16_t *buf );
 
@@ -119,8 +122,12 @@
     weight_fn_t *offsetsub;
     void (*weight_cache)( x264_t *, x264_weight_t * );
 
-    void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+    void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                    uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+
+    void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
+                                   int16_t *propagate_amount, uint16_t *lowres_costs,
+                                   int bipred_weight, int mb_y, int len, int list );
 } x264_mc_functions_t;
 
 void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );
​

x264-snapshot-20130723-2245.tar.bz2/common/mvpred.c -> x264-snapshot-20140321-2245.tar.bz2/common/mvpred.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mvpred.c: motion vector prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/opencl.c -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.c: OpenCL initialization and kernel compilation
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -28,7 +28,7 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#define ocl_open LoadLibrary( "OpenCL" )
+#define ocl_open LoadLibraryW( L"OpenCL" )
 #define ocl_close FreeLibrary
 #define ocl_address GetProcAddress
 #else
@@ -119,10 +119,10 @@
 
 /* Try to load the cached compiled program binary, verify the device context is
  * still valid before reuse */
-static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version )
+static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
     /* try to load cached program binary */
-    FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
     if( !fp )
         return NULL;
 
@@ -167,9 +167,9 @@
 
 /* Save the compiled program binary to a file for later reuse.  Device context
  * is also saved in the cache file so we do not reuse stale binaries */
-static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version )
+static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
-    FILE *fp = fopen( h->param.psz_clbin_file, "wb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
     if( !fp )
     {
         x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
@@ -304,7 +304,7 @@
         goto fail;
     }
 
-    FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" );
+    FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
     if( !log_file )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
@@ -672,9 +672,9 @@
     int ret = 0;
 
 #ifdef _WIN32
-    hDLL = LoadLibrary( "atiadlxx.dll" );
+    hDLL = LoadLibraryW( L"atiadlxx.dll" );
     if( !hDLL )
-        hDLL = LoadLibrary( "atiadlxy.dll" );
+        hDLL = LoadLibraryW( L"atiadlxy.dll" );
 #else
     hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
 #endif
@@ -685,7 +685,7 @@
     ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
     ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
     ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
-    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
+    if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
         !ADL_PowerXpress_Scheme_Get )
         goto fail1;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.c: OpenCL initialization and kernel compilation
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
@@ -28,7 +28,7 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#define ocl_open LoadLibrary( "OpenCL" )
+#define ocl_open LoadLibraryW( L"OpenCL" )
 #define ocl_close FreeLibrary
 #define ocl_address GetProcAddress
 #else
@@ -119,10 +119,10 @@
 
 /* Try to load the cached compiled program binary, verify the device context is
  * still valid before reuse */
-static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version )
+static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
     /* try to load cached program binary */
-    FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
     if( !fp )
         return NULL;
 
@@ -167,9 +167,9 @@
 
 /* Save the compiled program binary to a file for later reuse.  Device context
  * is also saved in the cache file so we do not reuse stale binaries */
-static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version )
+static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
 {
-    FILE *fp = fopen( h->param.psz_clbin_file, "wb" );
+    FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
     if( !fp )
     {
         x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
@@ -304,7 +304,7 @@
         goto fail;
     }
 
-    FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" );
+    FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
     if( !log_file )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
@@ -672,9 +672,9 @@
     int ret = 0;
 
 #ifdef _WIN32
-    hDLL = LoadLibrary( "atiadlxx.dll" );
+    hDLL = LoadLibraryW( L"atiadlxx.dll" );
     if( !hDLL )
-        hDLL = LoadLibrary( "atiadlxy.dll" );
+        hDLL = LoadLibraryW( L"atiadlxy.dll" );
 #else
     hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
 #endif
@@ -685,7 +685,7 @@
     ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
     ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
     ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
-    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
+    if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
         !ADL_PowerXpress_Scheme_Get )
         goto fail1;
 
​

x264-snapshot-20130723-2245.tar.bz2/common/opencl.h -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * opencl.h: OpenCL structures and defines
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *          Anton Mitrofanov <BugMaster@narod.ru>
​

x264-snapshot-20130723-2245.tar.bz2/common/osdep.c -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.c Changed

@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.c: platform-specific code
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,6 +27,11 @@
 
 #include "common.h"
 
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+
 #if SYS_WINDOWS
 #include <sys/types.h>
 #include <sys/timeb.h>
@@ -35,8 +41,6 @@
 #include <time.h>
 
 #if PTW32_STATIC_LIB
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
 /* this is a global in pthread-win32 to indicate if it has been initialized or not */
 extern int ptw32_processInitialized;
 #endif
@@ -134,3 +138,73 @@
 {}
 #endif
 #endif
+
+#ifdef _WIN32
+/* Functions for dealing with Unicode on Windows. */
+FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+
+int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
+
+int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
+
+int x264_vfprintf( FILE *stream, const char *format, va_list arg )
+{
+    HANDLE console = NULL;
+    DWORD mode;
+
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+
+        int length = vsnprintf( buf, sizeof(buf), format, arg );
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+
+int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
+#endif

 
@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.c: platform-specific code
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,6 +27,11 @@
 
 #include "common.h"
 
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+
 #if SYS_WINDOWS
 #include <sys/types.h>
 #include <sys/timeb.h>
@@ -35,8 +41,6 @@
 #include <time.h>
 
 #if PTW32_STATIC_LIB
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
 /* this is a global in pthread-win32 to indicate if it has been initialized or not */
 extern int ptw32_processInitialized;
 #endif
@@ -134,3 +138,73 @@
 {}
 #endif
 #endif
+
+#ifdef _WIN32
+/* Functions for dealing with Unicode on Windows. */
+FILE *x264_fopen( const char *filename, const char *mode )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    wchar_t mode_utf16[16];
+    if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
+        return _wfopen( filename_utf16, mode_utf16 );
+    return NULL;
+}
+
+int x264_rename( const char *oldname, const char *newname )
+{
+    wchar_t oldname_utf16[MAX_PATH];
+    wchar_t newname_utf16[MAX_PATH];
+    if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
+    {
+        /* POSIX says that rename() removes the destination, but Win32 doesn't. */
+        _wunlink( newname_utf16 );
+        return _wrename( oldname_utf16, newname_utf16 );
+    }
+    return -1;
+}
+
+int x264_stat( const char *path, x264_struct_stat *buf )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return _wstati64( path_utf16, buf );
+    return -1;
+}
+
+int x264_vfprintf( FILE *stream, const char *format, va_list arg )
+{
+    HANDLE console = NULL;
+    DWORD mode;
+
+    if( stream == stdout )
+        console = GetStdHandle( STD_OUTPUT_HANDLE );
+    else if( stream == stderr )
+        console = GetStdHandle( STD_ERROR_HANDLE );
+
+    /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
+    if( GetConsoleMode( console, &mode ) )
+    {
+        char buf[4096];
+        wchar_t buf_utf16[4096];
+
+        int length = vsnprintf( buf, sizeof(buf), format, arg );
+        if( length > 0 && length < sizeof(buf) )
+        {
+            /* WriteConsoleW is the most reliable way to output Unicode to a console. */
+            int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
+            DWORD written;
+            WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
+            return length;
+        }
+    }
+    return vfprintf( stream, format, arg );
+}
+
+int x264_is_pipe( const char *path )
+{
+    wchar_t path_utf16[MAX_PATH];
+    if( utf8_to_utf16( path, path_utf16 ) )
+        return WaitNamedPipeW( path_utf16, 0 );
+    return 0;
+}
+#endif
​

x264-snapshot-20130723-2245.tar.bz2/common/osdep.h -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.h Changed

@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.h: platform-specific code
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,19 +33,21 @@
 #include <stdio.h>
 #include <sys/stat.h>
 #include <inttypes.h>
+#include <stdarg.h>
 
 #include "config.h"
 
+#ifdef __INTEL_COMPILER
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
 #endif
 
-#ifdef _WIN32
-#include <io.h>    // _setmode()
-#include <fcntl.h> // _O_BINARY
-#endif
-
 #ifdef __ICL
 #define inline __inline
 #define strcasecmp _stricmp
@@ -54,12 +57,6 @@
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
 #endif
 
-#ifdef __INTEL_COMPILER
-#include <mathimf.h>
-#else
-#include <math.h>
-#endif
-
 #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
 #define HAVE_X86_INLINE_ASM 1
 #endif
@@ -67,11 +64,29 @@
 #if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
 #define isfinite finite
 #endif
+
 #ifdef _WIN32
-#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't.
 #ifndef strtok_r
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
+
+#define utf8_to_utf16( utf8, utf16 )\
+    MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
+FILE *x264_fopen( const char *filename, const char *mode );
+int x264_rename( const char *oldname, const char *newname );
+#define x264_struct_stat struct _stati64
+#define x264_fstat _fstati64
+int x264_stat( const char *path, x264_struct_stat *buf );
+int x264_vfprintf( FILE *stream, const char *format, va_list arg );
+int x264_is_pipe( const char *path );
+#else
+#define x264_fopen       fopen
+#define x264_rename      rename
+#define x264_struct_stat struct stat
+#define x264_fstat       fstat
+#define x264_stat        stat
+#define x264_vfprintf    vfprintf
+#define x264_is_pipe(x)  0
 #endif
 
 #ifdef __ICL
@@ -111,7 +126,7 @@
 
 #define EXPAND(x) x
 
-#if HAVE_32B_STACK_ALIGNMENT
+#if STACK_ALIGNMENT >= 32
 #define ALIGNED_ARRAY_32( type, name, sub1, ... )\
     ALIGNED_32( type name sub1 __VA_ARGS__ )
 #else
@@ -364,19 +379,19 @@
 #define x264_lower_thread_priority(p)
 #endif
 
-static inline uint8_t x264_is_regular_file( FILE *filehandle )
+static inline int x264_is_regular_file( FILE *filehandle )
 {
-    struct stat file_stat;
-    if( fstat( fileno( filehandle ), &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_fstat( fileno( filehandle ), &file_stat ) )
+        return 1;
     return S_ISREG( file_stat.st_mode );
 }
 
-static inline uint8_t x264_is_regular_file_path( const char *filename )
+static inline int x264_is_regular_file_path( const char *filename )
 {
-    struct stat file_stat;
-    if( stat( filename, &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_stat( filename, &file_stat ) )
+        return !x264_is_pipe( filename );
     return S_ISREG( file_stat.st_mode );
 }

 
@@ -1,10 +1,11 @@
 /*****************************************************************************
  * osdep.h: platform-specific code
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,19 +33,21 @@
 #include <stdio.h>
 #include <sys/stat.h>
 #include <inttypes.h>
+#include <stdarg.h>
 
 #include "config.h"
 
+#ifdef __INTEL_COMPILER
+#include <mathimf.h>
+#else
+#include <math.h>
+#endif
+
 #if !HAVE_LOG2F
 #define log2f(x) (logf(x)/0.693147180559945f)
 #define log2(x) (log(x)/0.693147180559945)
 #endif
 
-#ifdef _WIN32
-#include <io.h>    // _setmode()
-#include <fcntl.h> // _O_BINARY
-#endif
-
 #ifdef __ICL
 #define inline __inline
 #define strcasecmp _stricmp
@@ -54,12 +57,6 @@
 #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
 #endif
 
-#ifdef __INTEL_COMPILER
-#include <mathimf.h>
-#else
-#include <math.h>
-#endif
-
 #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
 #define HAVE_X86_INLINE_ASM 1
 #endif
@@ -67,11 +64,29 @@
 #if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
 #define isfinite finite
 #endif
+
 #ifdef _WIN32
-#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't.
 #ifndef strtok_r
 #define strtok_r(str,delim,save) strtok(str,delim)
 #endif
+
+#define utf8_to_utf16( utf8, utf16 )\
+    MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
+FILE *x264_fopen( const char *filename, const char *mode );
+int x264_rename( const char *oldname, const char *newname );
+#define x264_struct_stat struct _stati64
+#define x264_fstat _fstati64
+int x264_stat( const char *path, x264_struct_stat *buf );
+int x264_vfprintf( FILE *stream, const char *format, va_list arg );
+int x264_is_pipe( const char *path );
+#else
+#define x264_fopen       fopen
+#define x264_rename      rename
+#define x264_struct_stat struct stat
+#define x264_fstat       fstat
+#define x264_stat        stat
+#define x264_vfprintf    vfprintf
+#define x264_is_pipe(x)  0
 #endif
 
 #ifdef __ICL
@@ -111,7 +126,7 @@
 
 #define EXPAND(x) x
 
-#if HAVE_32B_STACK_ALIGNMENT
+#if STACK_ALIGNMENT >= 32
 #define ALIGNED_ARRAY_32( type, name, sub1, ... )\
     ALIGNED_32( type name sub1 __VA_ARGS__ )
 #else
@@ -364,19 +379,19 @@
 #define x264_lower_thread_priority(p)
 #endif
 
-static inline uint8_t x264_is_regular_file( FILE *filehandle )
+static inline int x264_is_regular_file( FILE *filehandle )
 {
-    struct stat file_stat;
-    if( fstat( fileno( filehandle ), &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_fstat( fileno( filehandle ), &file_stat ) )
+        return 1;
     return S_ISREG( file_stat.st_mode );
 }
 
-static inline uint8_t x264_is_regular_file_path( const char *filename )
+static inline int x264_is_regular_file_path( const char *filename )
 {
-    struct stat file_stat;
-    if( stat( filename, &file_stat ) )
-        return -1;
+    x264_struct_stat file_stat;
+    if( x264_stat( filename, &file_stat ) )
+        return !x264_is_pipe( filename );
     return S_ISREG( file_stat.st_mode );
 }
 
​

x264-snapshot-20130723-2245.tar.bz2/common/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -36,6 +36,7 @@
 #endif
 #if ARCH_ARM
 #   include "arm/pixel.h"
+#   include "arm/predict.h"
 #endif
 #if ARCH_UltraSPARC
 #   include "sparc/pixel.h"
@@ -532,6 +533,10 @@
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP_8x8( sad, _neon, _neon )
+INTRA_MBCMP_8x8(sa8d, _neon, _neon )
+#endif
 
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
 void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
@@ -555,16 +560,26 @@
 
 #if HAVE_MMX
 #if HIGH_BIT_DEPTH
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
 #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
 #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
 #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _mmx2, _c )
-INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _mmx2, _mmx2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse2, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _sse2, _sse2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _ssse3, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse4, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _avx, _sse2 )
 #else
 #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
 INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
@@ -577,6 +592,16 @@
 INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _xop, _mmx2 )
 #endif
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP(satd,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
+INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
+#endif
 
 // No C implementation of intra_satd_x9. See checkasm for its behavior,
 // or see x264_mb_analyse_intra for the entirely different algorithm we
@@ -868,6 +893,8 @@
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
     }
@@ -909,6 +936,8 @@
         pixf->asd8 = x264_pixel_asd8_sse2;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
     }
     if( cpu&X264_CPU_SSE2_IS_FAST )
@@ -948,6 +977,8 @@
         pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
     }
     if( cpu&X264_CPU_SSE4 )
@@ -963,6 +994,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
     }
     if( cpu&X264_CPU_AVX )
     {
@@ -985,6 +1017,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
     }
     if( cpu&X264_CPU_XOP )
     {
@@ -1119,12 +1152,6 @@
                pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
            }
         }
-
-        if( cpu&X264_CPU_SSE_MISALIGN )
-        {
-            INIT2( sad_x3, _sse2_misalign );
-            INIT2( sad_x4, _sse2_misalign );
-        }
     }
 
     if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
@@ -1201,9 +1228,8 @@
         }
         else
         {
-            pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3;
-            pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3;
-            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3;
+            INIT2( sad_x3, _ssse3 );
+            INIT5( sad_x4, _ssse3 );
         }
         if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
         {
@@ -1237,6 +1263,8 @@
     if( cpu&X264_CPU_AVX )
     {
         INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
+        INIT2( sad_x3, _avx );
+        INIT2( sad_x4, _avx );
         INIT8( satd, _avx );
         INIT7( satd_x3, _avx );
         INIT7( satd_x4, _avx );
@@ -1334,8 +1362,21 @@
         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
         pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
+        pixf->var[PIXEL_8x16]   = x264_pixel_var_8x16_neon;
         pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
         pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8_neon;
+        pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_neon;
+
+        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
+        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
+        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
+        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
+        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
+        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
+        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
+        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
 
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
         pixf->ssim_end4         = x264_pixel_ssim_end4_neon;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -36,6 +36,7 @@
 #endif
 #if ARCH_ARM
 #   include "arm/pixel.h"
+#   include "arm/predict.h"
 #endif
 #if ARCH_UltraSPARC
 #   include "sparc/pixel.h"
@@ -532,6 +533,10 @@
 INTRA_MBCMP_8x8( sad, _mmx2,  _c )
 INTRA_MBCMP_8x8(sa8d, _sse2,  _sse2 )
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP_8x8( sad, _neon, _neon )
+INTRA_MBCMP_8x8(sa8d, _neon, _neon )
+#endif
 
 #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
 void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
@@ -555,16 +560,26 @@
 
 #if HAVE_MMX
 #if HIGH_BIT_DEPTH
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
 #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
 #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
 #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
 INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _mmx2, _c )
-INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _mmx2, _mmx2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _sse2, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse2, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _sse2, _sse2 )
 INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _ssse3, _sse2 )
 INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _ssse3, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _sse4, _sse2 )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _avx, _sse2 )
 #else
 #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
 INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _mmx2, _mmx2 )
@@ -577,6 +592,16 @@
 INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _xop, _mmx2 )
 #endif
 #endif
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
+INTRA_MBCMP( sad,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP(satd,  4x4,   v, h, dc,  , _neon, _c )
+INTRA_MBCMP( sad,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP(satd,  8x8,  dc, h,  v, c, _neon, _neon )
+INTRA_MBCMP( sad,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _neon, _c )
+INTRA_MBCMP( sad, 16x16,  v, h, dc,  , _neon, _neon )
+INTRA_MBCMP(satd, 16x16,  v, h, dc,  , _neon, _neon )
+#endif
 
 // No C implementation of intra_satd_x9. See checkasm for its behavior,
 // or see x264_mb_analyse_intra for the entirely different algorithm we
@@ -868,6 +893,8 @@
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_mmx2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_mmx2;
         pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_mmx2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_mmx2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_mmx2;
         pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
     }
@@ -909,6 +936,8 @@
         pixf->asd8 = x264_pixel_asd8_sse2;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_sse2;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_sse2;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_sse2;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_sse2;
     }
     if( cpu&X264_CPU_SSE2_IS_FAST )
@@ -948,6 +977,8 @@
         pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_ssse3;
         pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_ssse3;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_ssse3;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
         pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_ssse3;
     }
     if( cpu&X264_CPU_SSE4 )
@@ -963,6 +994,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
     }
     if( cpu&X264_CPU_AVX )
     {
@@ -985,6 +1017,7 @@
 #if ARCH_X86_64
         pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
 #endif
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
     }
     if( cpu&X264_CPU_XOP )
     {
@@ -1119,12 +1152,6 @@
                pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
            }
         }
-
-        if( cpu&X264_CPU_SSE_MISALIGN )
-        {
-            INIT2( sad_x3, _sse2_misalign );
-            INIT2( sad_x4, _sse2_misalign );
-        }
     }
 
     if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
@@ -1201,9 +1228,8 @@
         }
         else
         {
-            pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3;
-            pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3;
-            pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3;
+            INIT2( sad_x3, _ssse3 );
+            INIT5( sad_x4, _ssse3 );
         }
         if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
         {
@@ -1237,6 +1263,8 @@
     if( cpu&X264_CPU_AVX )
     {
         INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
+        INIT2( sad_x3, _avx );
+        INIT2( sad_x4, _avx );
         INIT8( satd, _avx );
         INIT7( satd_x3, _avx );
         INIT7( satd_x4, _avx );
@@ -1334,8 +1362,21 @@
         pixf->sa8d[PIXEL_8x8]   = x264_pixel_sa8d_8x8_neon;
         pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
         pixf->var[PIXEL_8x8]    = x264_pixel_var_8x8_neon;
+        pixf->var[PIXEL_8x16]   = x264_pixel_var_8x16_neon;
         pixf->var[PIXEL_16x16]  = x264_pixel_var_16x16_neon;
         pixf->var2[PIXEL_8x8]   = x264_pixel_var2_8x8_neon;
+        pixf->var2[PIXEL_8x16]  = x264_pixel_var2_8x16_neon;
+
+        pixf->intra_sad_x3_4x4    = x264_intra_sad_x3_4x4_neon;
+        pixf->intra_satd_x3_4x4   = x264_intra_satd_x3_4x4_neon;
+        pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_neon;
+        pixf->intra_sa8d_x3_8x8   = x264_intra_sa8d_x3_8x8_neon;
+        pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
+        pixf->intra_satd_x3_8x8c  = x264_intra_satd_x3_8x8c_neon;
+        pixf->intra_sad_x3_8x16c  = x264_intra_sad_x3_8x16c_neon;
+        pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
+        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
+        pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
 
         pixf->ssim_4x4x2_core   = x264_pixel_ssim_4x4x2_core_neon;
         pixf->ssim_end4         = x264_pixel_ssim_end4_neon;
​

x264-snapshot-20130723-2245.tar.bz2/common/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.h Changed

 
@@ -1,11 +1,11 @@
 /*****************************************************************************
  * pixel.c: pixel metrics
  *****************************************************************************
- * Copyright (C) 2004-2013 x264 project
+ * Copyright (C) 2004-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
-            Henrik Gramner <hengar-6@student.ltu.se>
+            Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.c: ppc transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *          Eric Petit <eric.petit@lapsus.org>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: ppc transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/deblock.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * deblock.c: ppc deblocking
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.c: ppc motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: ppc motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.c: ppc pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *          Guillaume Poirier <gpoirier@mplayerhq.hu>
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: ppc pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/ppccommon.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/ppccommon.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ppccommon.h: ppc utility macros
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Eric Petit <eric.petit@lapsus.org>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.c: ppc intra prediction
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: ppc intra prediction
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.c: ppc quantization
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.c: ppc quantization
  *****************************************************************************
- * Copyright (C) 2007-2013 x264 project
+ * Copyright (C) 2007-2014 x264 project
  *
  * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/predict.c Changed

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * predict.c: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/predict.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/quant.c Changed

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * quant.c: quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Christian Heine <sennindemokrit@gmx.net>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/rectangle.c -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rectangle.c: rectangle filling
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/rectangle.h -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rectangle.h: rectangle filling
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/set.c -> x264-snapshot-20140321-2245.tar.bz2/common/set.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.c: quantization init
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -105,9 +105,9 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
             CHECKED_MALLOC( h->dequant##w##_mf[i],  6*size*sizeof(int) );\
-            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\
+            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\
         }\
         for( j = 0; j < i; j++ )\
             if( deadzone[j] == deadzone[i] &&\
@@ -120,8 +120,8 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
-            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
         }\
     }
 
@@ -159,7 +159,7 @@
                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
             }
     }
-    for( int q = 0; q < QP_MAX+1; q++ )
+    for( int q = 0; q <= QP_MAX_SPEC; q++ )
     {
         int j;
         for( int i_list = 0; i_list < 4; i_list++ )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.c: quantization init
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *
@@ -105,9 +105,9 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->  quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
             CHECKED_MALLOC( h->dequant##w##_mf[i],  6*size*sizeof(int) );\
-            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\
+            CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\
         }\
         for( j = 0; j < i; j++ )\
             if( deadzone[j] == deadzone[i] &&\
@@ -120,8 +120,8 @@
         }\
         else\
         {\
-            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
-            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
+            CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
         }\
     }
 
@@ -159,7 +159,7 @@
                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
             }
     }
-    for( int q = 0; q < QP_MAX+1; q++ )
+    for( int q = 0; q <= QP_MAX_SPEC; q++ )
     {
         int j;
         for( int i_list = 0; i_list < 4; i_list++ )
​

x264-snapshot-20130723-2245.tar.bz2/common/set.h -> x264-snapshot-20140321-2245.tar.bz2/common/set.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: quantization init
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -248,6 +248,98 @@
     x264_cqm_jvt8i, x264_cqm_jvt8p
 };
 
+// 1080i25_avci50, 1080p25_avci50
+static const uint8_t x264_cqm_avci50_4ic[16] =
+{
+    16,22,28,40,
+    22,28,40,44,
+    28,40,44,48,
+    40,44,48,60
+};
+
+//  1080i25_avci50,
+static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
+{
+    16,18,19,21,27,33,81,87,
+    18,19,21,24,30,33,81,87,
+    19,21,24,27,30,78,84,90,
+    21,24,27,30,33,78,84,90,
+    24,27,30,33,78,81,84,90,
+    24,27,30,33,78,81,84,93,
+    27,30,33,78,78,81,87,93,
+    30,33,33,78,81,84,87,96
+};
+
+//  1080p25_avci50, 720p25_avci50, 720p50_avci50
+static const uint8_t x264_cqm_avci50_p_8iy[64] =
+{
+    16,18,19,21,24,27,30,33,
+    18,19,21,24,27,30,33,78,
+    19,21,24,27,30,33,78,81,
+    21,24,27,30,33,78,81,84,
+    24,27,30,33,78,81,84,87,
+    27,30,33,78,81,84,87,90,
+    30,33,78,81,84,87,90,93,
+    33,78,81,84,87,90,93,96
+};
+
+//  1080i25_avci100, 1080p25_avci100
+static const uint8_t x264_cqm_avci100_1080_4ic[16] =
+{
+    16,20,26,32,
+    20,26,32,38,
+    26,32,38,44,
+    32,38,44,50
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_4ic[16] =
+{
+    16,21,27,34,
+    21,27,34,41,
+    27,34,41,46,
+    34,41,46,54
+};
+
+//  1080i25_avci100,
+static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
+{
+    16,19,20,23,24,26,32,42,
+    18,19,22,24,26,32,36,42,
+    18,20,23,24,26,32,36,63,
+    19,20,23,26,32,36,42,63,
+    20,22,24,26,32,36,59,63,
+    22,23,24,26,32,36,59,68,
+    22,23,24,26,32,42,59,68,
+    22,23,24,26,36,42,59,72
+};
+
+// 1080p25_avci100,
+static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
+{
+    16,18,19,20,22,23,24,26,
+    18,19,20,22,23,24,26,32,
+    19,20,22,23,24,26,32,36,
+    20,22,23,24,26,32,36,42,
+    22,23,24,26,32,36,42,59,
+    23,24,26,32,36,42,59,63,
+    24,26,32,36,42,59,63,68,
+    26,32,36,42,59,63,68,72
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_8iy[64] =
+{
+    16,18,19,21,22,24,26,32,
+    18,19,19,21,22,24,26,32,
+    19,19,21,22,22,24,26,32,
+    21,21,22,22,23,24,26,34,
+    22,22,22,23,24,25,26,34,
+    24,24,24,24,25,26,34,36,
+    26,26,26,26,26,34,36,38,
+    32,32,32,34,34,36,38,42
+};
+
 int  x264_cqm_init( x264_t *h );
 void x264_cqm_delete( x264_t *h );
 int  x264_cqm_parse_file( x264_t *h, const char *filename );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: quantization init
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -248,6 +248,98 @@
     x264_cqm_jvt8i, x264_cqm_jvt8p
 };
 
+// 1080i25_avci50, 1080p25_avci50
+static const uint8_t x264_cqm_avci50_4ic[16] =
+{
+    16,22,28,40,
+    22,28,40,44,
+    28,40,44,48,
+    40,44,48,60
+};
+
+//  1080i25_avci50,
+static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
+{
+    16,18,19,21,27,33,81,87,
+    18,19,21,24,30,33,81,87,
+    19,21,24,27,30,78,84,90,
+    21,24,27,30,33,78,84,90,
+    24,27,30,33,78,81,84,90,
+    24,27,30,33,78,81,84,93,
+    27,30,33,78,78,81,87,93,
+    30,33,33,78,81,84,87,96
+};
+
+//  1080p25_avci50, 720p25_avci50, 720p50_avci50
+static const uint8_t x264_cqm_avci50_p_8iy[64] =
+{
+    16,18,19,21,24,27,30,33,
+    18,19,21,24,27,30,33,78,
+    19,21,24,27,30,33,78,81,
+    21,24,27,30,33,78,81,84,
+    24,27,30,33,78,81,84,87,
+    27,30,33,78,81,84,87,90,
+    30,33,78,81,84,87,90,93,
+    33,78,81,84,87,90,93,96
+};
+
+//  1080i25_avci100, 1080p25_avci100
+static const uint8_t x264_cqm_avci100_1080_4ic[16] =
+{
+    16,20,26,32,
+    20,26,32,38,
+    26,32,38,44,
+    32,38,44,50
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_4ic[16] =
+{
+    16,21,27,34,
+    21,27,34,41,
+    27,34,41,46,
+    34,41,46,54
+};
+
+//  1080i25_avci100,
+static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
+{
+    16,19,20,23,24,26,32,42,
+    18,19,22,24,26,32,36,42,
+    18,20,23,24,26,32,36,63,
+    19,20,23,26,32,36,42,63,
+    20,22,24,26,32,36,59,63,
+    22,23,24,26,32,36,59,68,
+    22,23,24,26,32,42,59,68,
+    22,23,24,26,36,42,59,72
+};
+
+// 1080p25_avci100,
+static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
+{
+    16,18,19,20,22,23,24,26,
+    18,19,20,22,23,24,26,32,
+    19,20,22,23,24,26,32,36,
+    20,22,23,24,26,32,36,42,
+    22,23,24,26,32,36,42,59,
+    23,24,26,32,36,42,59,63,
+    24,26,32,36,42,59,63,68,
+    26,32,36,42,59,63,68,72
+};
+
+// 720p25_avci100, 720p50_avci100
+static const uint8_t x264_cqm_avci100_720p_8iy[64] =
+{
+    16,18,19,21,22,24,26,32,
+    18,19,19,21,22,24,26,32,
+    19,19,21,22,22,24,26,32,
+    21,21,22,22,23,24,26,34,
+    22,22,22,23,24,25,26,34,
+    24,24,24,24,25,26,34,36,
+    26,26,26,26,26,34,36,38,
+    32,32,32,34,34,36,38,42
+};
+
 int  x264_cqm_init( x264_t *h );
 void x264_cqm_delete( x264_t *h );
 int  x264_cqm_parse_file( x264_t *h, const char *filename );
​

x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.asm -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.asm Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.asm: sparc pixel metrics
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Phil Jensen <philj@csufresno.edu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: sparc pixel metrics
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Phil Jensen <philj@csufresno.edu>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/threadpool.c -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * threadpool.c: thread pooling
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/threadpool.h -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * threadpool.h: thread pooling
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/common/vlc.c -> x264-snapshot-20140321-2245.tar.bz2/common/vlc.c Changed

 
@@ -1,11 +1,11 @@
 /*****************************************************************************
  * vlc.c : vlc tables
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
​

x264-snapshot-20130723-2245.tar.bz2/common/win32thread.c -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.c: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Pegasys Inc. <http://www.pegasys-inc.com>
@@ -261,7 +261,7 @@
 int x264_win32_threading_init( void )
 {
     /* find function pointers to API functions, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
     if( thread_control.cond_init )
     {
@@ -288,7 +288,7 @@
      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
 #if ARCH_X86_64
     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
     if( get_thread_affinity )
     {

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.c: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          Pegasys Inc. <http://www.pegasys-inc.com>
@@ -261,7 +261,7 @@
 int x264_win32_threading_init( void )
 {
     /* find function pointers to API functions, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
     if( thread_control.cond_init )
     {
@@ -288,7 +288,7 @@
      * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
 #if ARCH_X86_64
     /* find function pointers to API functions specific to x86_64 platforms, if they exist */
-    HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
+    HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
     BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
     if( get_thread_affinity )
     {
​

x264-snapshot-20130723-2245.tar.bz2/common/win32thread.h -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * win32thread.h: windows threading
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -26,7 +26,6 @@
 #ifndef X264_WIN32THREAD_H
 #define X264_WIN32THREAD_H
 
-#define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 /* the following macro is used within x264 */
 #undef ERROR
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/bitstream-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/bitstream-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* bitstream-a.asm: x86 bitstream functions
 ;*****************************************************************************
-;* Copyright (C) 2010-2013 x264 project
+;* Copyright (C) 2010-2014 x264 project
 ;*
 ;* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
 ;*          Henrik Gramner <henrik@gramner.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/cabac-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cabac-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* cabac-a.asm: x86 cabac
 ;*****************************************************************************
-;* Copyright (C) 2008-2013 x264 project
+;* Copyright (C) 2008-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/const-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/const-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* const-a.asm: x86 global constants
 ;*****************************************************************************
-;* Copyright (C) 2010-2013 x264 project
+;* Copyright (C) 2010-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -36,6 +36,7 @@
 const pw_512,      times 16 dw 512
 const pw_00ff,     times 16 dw 0x00ff
 const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
+const pw_0to15,    dw 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 const pd_1,        times 8 dd 1
 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/cpu-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cpu-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* cpu-a.asm: x86 cpu utilities
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -146,17 +146,6 @@
     sfence
     ret
 
-;-----------------------------------------------------------------------------
-; void cpu_mask_misalign_sse( void )
-;-----------------------------------------------------------------------------
-cglobal cpu_mask_misalign_sse
-    sub   rsp, 4
-    stmxcsr [rsp]
-    or dword [rsp], 1<<17
-    ldmxcsr [rsp]
-    add   rsp, 4
-    ret
-
 cextern intel_cpu_indicator_init
 
 ;-----------------------------------------------------------------------------
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-32.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-32.asm: x86_32 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-64.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-64.asm: x86_64 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* dct-a.asm: x86 transform and zigzag
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Holger Lubitz <holger@lubitz.org>
 ;*          Loren Merritt <lorenm@u.washington.edu>
@@ -675,7 +675,7 @@
     mova        m6, [pw_pixel_max]
     mova        m7, [pd_32]
     pxor        m5, m5
-.loop
+.loop:
     mova        m3, [r1]
     paddd       m3, m7
     psrad       m3, 6         ; dc0   0 dc1   0 dc2   0 dc3   0
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * dct.h: x86 transform and zigzag
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/deblock-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/deblock-a.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* deblock-a.asm: x86 deblocking
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -621,7 +621,7 @@
     mov     r6, 2
     mova    m0, [pw_2]
     LOAD_AB aa, bb, r2d, r3d
-.loop
+.loop:
     mova    p2, [r4+r1]
     mova    p1, [r4+2*r1]
     mova    p0, [r4+r5]
@@ -671,7 +671,7 @@
     add     r4, r0     ; pix+4*stride
     mov     r6, 2
     mova    m0, [pw_2]
-.loop
+.loop:
     movu    q3, [r0-8]
     movu    q2, [r0+r1-8]
     movu    q1, [r0+r1*2-8]
@@ -804,35 +804,6 @@
 %define PASS8ROWS(base, base3, stride, stride3, offset) \
     PASS8ROWS(base+offset, base3+offset, stride, stride3)
 
-; in: 8 rows of 4 bytes in %4..%11
-; out: 4 rows of 8 bytes in m0..m3
-%macro TRANSPOSE4x8_LOAD 11
-    movh       m0, %4
-    movh       m2, %5
-    movh       m1, %6
-    movh       m3, %7
-    punpckl%1  m0, m2
-    punpckl%1  m1, m3
-    mova       m2, m0
-    punpckl%2  m0, m1
-    punpckh%2  m2, m1
-
-    movh       m4, %8
-    movh       m6, %9
-    movh       m5, %10
-    movh       m7, %11
-    punpckl%1  m4, m6
-    punpckl%1  m5, m7
-    mova       m6, m4
-    punpckl%2  m4, m5
-    punpckh%2  m6, m5
-
-    punpckh%3  m1, m0, m4
-    punpckh%3  m3, m2, m6
-    punpckl%3  m0, m4
-    punpckl%3  m2, m6
-%endmacro
-
 ; in: 4 rows of 8 bytes in m0..m3
 ; out: 8 rows of 4 bytes in %1..%8
 %macro TRANSPOSE8x4B_STORE 8
@@ -844,24 +815,24 @@
     punpcklbw  m2, m3
     punpcklwd  m1, m0, m2
     punpckhwd  m0, m2
-    movh       %1, m1
+    movd       %1, m1
     punpckhdq  m1, m1
-    movh       %2, m1
-    movh       %3, m0
+    movd       %2, m1
+    movd       %3, m0
     punpckhdq  m0, m0
-    movh       %4, m0
+    movd       %4, m0
 
     punpckhdq  m3, m3
     punpcklbw  m4, m5
     punpcklbw  m6, m3
     punpcklwd  m5, m4, m6
     punpckhwd  m4, m6
-    movh       %5, m5
+    movd       %5, m5
     punpckhdq  m5, m5
-    movh       %6, m5
-    movh       %7, m4
+    movd       %6, m5
+    movd       %7, m4
     punpckhdq  m4, m4
-    movh       %8, m4
+    movd       %8, m4
 %endmacro
 
 ; in: 8 rows of 4 bytes in %9..%10
@@ -877,34 +848,94 @@
     pextrd %8, %10, 3
 %endmacro
 
-%macro TRANSPOSE4x8B_LOAD 8
-    TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
-%endmacro
-
-%macro TRANSPOSE4x8W_LOAD 8
-%if mmsize==16
-    TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8
-%else
+; in: 4 rows of 4 words in %1..%4
+; out: 4 rows of 4 word in m0..m3
+; clobbers: m4
+%macro TRANSPOSE4x4W_LOAD 4-8
+%if mmsize==8
     SWAP  1, 4, 2, 3
-    mova  m0, [t5]
-    mova  m1, [t5+r1]
-    mova  m2, [t5+r1*2]
-    mova  m3, [t5+t6]
+    movq  m0, %1
+    movq  m1, %2
+    movq  m2, %3
+    movq  m3, %4
     TRANSPOSE4x4W 0, 1, 2, 3, 4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+    movhlps    m1, m0
+    movhlps    m3, m2
 %endif
 %endmacro
 
-%macro TRANSPOSE8x2W_STORE 8
+; in: 2 rows of 4 words in m1..m2
+; out: 4 rows of 2 words in %1..%4
+; clobbers: m0, m1
+%macro TRANSPOSE4x2W_STORE 4-8
+%if mmsize==8
     punpckhwd  m0, m1, m2
     punpcklwd  m1, m2
-%if mmsize==8
+%else
+    punpcklwd  m1, m2
+    movhlps    m0, m1
+%endif
     movd       %3, m0
     movd       %1, m1
     psrlq      m1, 32
     psrlq      m0, 32
     movd       %2, m1
     movd       %4, m0
+%endmacro
+
+; in: 4/8 rows of 4 words in %1..%8
+; out: 4 rows of 4/8 word in m0..m3
+; clobbers: m4, m5, m6, m7
+%macro TRANSPOSE4x8W_LOAD 8
+%if mmsize==8
+    TRANSPOSE4x4W_LOAD %1, %2, %3, %4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+
+    movq       m4, %5
+    movq       m6, %6
+    movq       m5, %7
+    movq       m7, %8
+    punpcklwd  m4, m6
+    punpcklwd  m5, m7
+    mova       m6, m4
+    punpckldq  m4, m5
+    punpckhdq  m6, m5
+
+    punpckhqdq m1, m0, m4
+    punpckhqdq m3, m2, m6
+    punpcklqdq m0, m4
+    punpcklqdq m2, m6
+%endif
+%endmacro
+
+; in: 2 rows of 4/8 words in m1..m2
+; out: 4/8 rows of 2 words in %1..%8
+; clobbers: m0, m1
+%macro TRANSPOSE8x2W_STORE 8
+%if mmsize==8
+    TRANSPOSE4x2W_STORE %1, %2, %3, %4
 %else
+    punpckhwd  m0, m1, m2
+    punpcklwd  m1, m2
     movd       %5, m0
     movd       %1, m1
     psrldq     m1, 4
@@ -1118,7 +1149,7 @@
 %endif
     mova     m6, [pb_1]
     psubusb  m4, m6              ; alpha - 1
-    psubusb  m5, m6              ; alpha - 2
+    psubusb  m5, m6              ; beta - 1
 %if %0>2
     mova     %3, m4
 %endif
@@ -1361,19 +1392,18 @@
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-
 %if cpuflag(avx)
 INIT_XMM cpuname
 %else
 INIT_MMX cpuname
 %endif
-cglobal deblock_h_luma, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
-    mov    r0, r0mp
+cglobal deblock_h_luma, 1,5,8,0x60+12
     mov    r3, r1m
     lea    r4, [r3*3]
     sub    r0, 4
     lea    r1, [r0+r4]
-    %define pix_tmp esp+12*HAVE_ALIGNED_STACK
+    %define pix_tmp esp+12
+    ; esp is intentionally misaligned to make it aligned after pushing the arguments for deblock_%1_luma.
 
     ; transpose 6x16 -> tmp space
     TRANSPOSE6x8_MEM  PASS8ROWS(r0, r1, r3, r4), pix_tmp
@@ -2098,17 +2128,14 @@
 ;-----------------------------------------------------------------------------
 %macro DEBLOCK_H_CHROMA_420_MBAFF 0
 cglobal deblock_h_chroma_mbaff, 5,7,8
-    sub    r0, 4
-    lea    t6, [r1*3]
-    mov    t5, r0
-    add    r0, t6
-    TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
+    CHROMA_H_START
+    TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6)
     LOAD_MASK  r2d, r3d
     movd       m6, [r4] ; tc0
     punpcklbw  m6, m6
     pand       m7, m6
     DEBLOCK_P0_Q0
-    TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
+    TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
     RET
 %endmacro
 
@@ -2249,9 +2276,9 @@
 INIT_MMX mmx2
 cglobal deblock_h_chroma_intra_mbaff, 4,6,8
     CHROMA_H_START
-    TRANSPOSE4x8W_LOAD  PASS8ROWS(t5, r0, r1, t6)
+    TRANSPOSE4x4W_LOAD  PASS8ROWS(t5, r0, r1, t6)
     call chroma_intra_body
-    TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
+    TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
     RET
 %endif ; !HIGH_BIT_DEPTH

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* deblock-a.asm: x86 deblocking
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -621,7 +621,7 @@
     mov     r6, 2
     mova    m0, [pw_2]
     LOAD_AB aa, bb, r2d, r3d
-.loop
+.loop:
     mova    p2, [r4+r1]
     mova    p1, [r4+2*r1]
     mova    p0, [r4+r5]
@@ -671,7 +671,7 @@
     add     r4, r0     ; pix+4*stride
     mov     r6, 2
     mova    m0, [pw_2]
-.loop
+.loop:
     movu    q3, [r0-8]
     movu    q2, [r0+r1-8]
     movu    q1, [r0+r1*2-8]
@@ -804,35 +804,6 @@
 %define PASS8ROWS(base, base3, stride, stride3, offset) \
     PASS8ROWS(base+offset, base3+offset, stride, stride3)
 
-; in: 8 rows of 4 bytes in %4..%11
-; out: 4 rows of 8 bytes in m0..m3
-%macro TRANSPOSE4x8_LOAD 11
-    movh       m0, %4
-    movh       m2, %5
-    movh       m1, %6
-    movh       m3, %7
-    punpckl%1  m0, m2
-    punpckl%1  m1, m3
-    mova       m2, m0
-    punpckl%2  m0, m1
-    punpckh%2  m2, m1
-
-    movh       m4, %8
-    movh       m6, %9
-    movh       m5, %10
-    movh       m7, %11
-    punpckl%1  m4, m6
-    punpckl%1  m5, m7
-    mova       m6, m4
-    punpckl%2  m4, m5
-    punpckh%2  m6, m5
-
-    punpckh%3  m1, m0, m4
-    punpckh%3  m3, m2, m6
-    punpckl%3  m0, m4
-    punpckl%3  m2, m6
-%endmacro
-
 ; in: 4 rows of 8 bytes in m0..m3
 ; out: 8 rows of 4 bytes in %1..%8
 %macro TRANSPOSE8x4B_STORE 8
@@ -844,24 +815,24 @@
     punpcklbw  m2, m3
     punpcklwd  m1, m0, m2
     punpckhwd  m0, m2
-    movh       %1, m1
+    movd       %1, m1
     punpckhdq  m1, m1
-    movh       %2, m1
-    movh       %3, m0
+    movd       %2, m1
+    movd       %3, m0
     punpckhdq  m0, m0
-    movh       %4, m0
+    movd       %4, m0
 
     punpckhdq  m3, m3
     punpcklbw  m4, m5
     punpcklbw  m6, m3
     punpcklwd  m5, m4, m6
     punpckhwd  m4, m6
-    movh       %5, m5
+    movd       %5, m5
     punpckhdq  m5, m5
-    movh       %6, m5
-    movh       %7, m4
+    movd       %6, m5
+    movd       %7, m4
     punpckhdq  m4, m4
-    movh       %8, m4
+    movd       %8, m4
 %endmacro
 
 ; in: 8 rows of 4 bytes in %9..%10
@@ -877,34 +848,94 @@
     pextrd %8, %10, 3
 %endmacro
 
-%macro TRANSPOSE4x8B_LOAD 8
-    TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
-%endmacro
-
-%macro TRANSPOSE4x8W_LOAD 8
-%if mmsize==16
-    TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8
-%else
+; in: 4 rows of 4 words in %1..%4
+; out: 4 rows of 4 word in m0..m3
+; clobbers: m4
+%macro TRANSPOSE4x4W_LOAD 4-8
+%if mmsize==8
     SWAP  1, 4, 2, 3
-    mova  m0, [t5]
-    mova  m1, [t5+r1]
-    mova  m2, [t5+r1*2]
-    mova  m3, [t5+t6]
+    movq  m0, %1
+    movq  m1, %2
+    movq  m2, %3
+    movq  m3, %4
     TRANSPOSE4x4W 0, 1, 2, 3, 4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+    movhlps    m1, m0
+    movhlps    m3, m2
 %endif
 %endmacro
 
-%macro TRANSPOSE8x2W_STORE 8
+; in: 2 rows of 4 words in m1..m2
+; out: 4 rows of 2 words in %1..%4
+; clobbers: m0, m1
+%macro TRANSPOSE4x2W_STORE 4-8
+%if mmsize==8
     punpckhwd  m0, m1, m2
     punpcklwd  m1, m2
-%if mmsize==8
+%else
+    punpcklwd  m1, m2
+    movhlps    m0, m1
+%endif
     movd       %3, m0
     movd       %1, m1
     psrlq      m1, 32
     psrlq      m0, 32
     movd       %2, m1
     movd       %4, m0
+%endmacro
+
+; in: 4/8 rows of 4 words in %1..%8
+; out: 4 rows of 4/8 word in m0..m3
+; clobbers: m4, m5, m6, m7
+%macro TRANSPOSE4x8W_LOAD 8
+%if mmsize==8
+    TRANSPOSE4x4W_LOAD %1, %2, %3, %4
+%else
+    movq       m0, %1
+    movq       m2, %2
+    movq       m1, %3
+    movq       m3, %4
+    punpcklwd  m0, m2
+    punpcklwd  m1, m3
+    mova       m2, m0
+    punpckldq  m0, m1
+    punpckhdq  m2, m1
+
+    movq       m4, %5
+    movq       m6, %6
+    movq       m5, %7
+    movq       m7, %8
+    punpcklwd  m4, m6
+    punpcklwd  m5, m7
+    mova       m6, m4
+    punpckldq  m4, m5
+    punpckhdq  m6, m5
+
+    punpckhqdq m1, m0, m4
+    punpckhqdq m3, m2, m6
+    punpcklqdq m0, m4
+    punpcklqdq m2, m6
+%endif
+%endmacro
+
+; in: 2 rows of 4/8 words in m1..m2
+; out: 4/8 rows of 2 words in %1..%8
+; clobbers: m0, m1
+%macro TRANSPOSE8x2W_STORE 8
+%if mmsize==8
+    TRANSPOSE4x2W_STORE %1, %2, %3, %4
 %else
+    punpckhwd  m0, m1, m2
+    punpcklwd  m1, m2
     movd       %5, m0
     movd       %1, m1
     psrldq     m1, 4
@@ -1118,7 +1149,7 @@
 %endif
     mova     m6, [pb_1]
     psubusb  m4, m6              ; alpha - 1
-    psubusb  m5, m6              ; alpha - 2
+    psubusb  m5, m6              ; beta - 1
 %if %0>2
     mova     %3, m4
 %endif
@@ -1361,19 +1392,18 @@
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-
 %if cpuflag(avx)
 INIT_XMM cpuname
 %else
 INIT_MMX cpuname
 %endif
-cglobal deblock_h_luma, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
-    mov    r0, r0mp
+cglobal deblock_h_luma, 1,5,8,0x60+12
     mov    r3, r1m
     lea    r4, [r3*3]
     sub    r0, 4
     lea    r1, [r0+r4]
-    %define pix_tmp esp+12*HAVE_ALIGNED_STACK
+    %define pix_tmp esp+12
+    ; esp is intentionally misaligned to make it aligned after pushing the arguments for deblock_%1_luma.
 
     ; transpose 6x16 -> tmp space
     TRANSPOSE6x8_MEM  PASS8ROWS(r0, r1, r3, r4), pix_tmp
@@ -2098,17 +2128,14 @@
 ;-----------------------------------------------------------------------------
 %macro DEBLOCK_H_CHROMA_420_MBAFF 0
 cglobal deblock_h_chroma_mbaff, 5,7,8
-    sub    r0, 4
-    lea    t6, [r1*3]
-    mov    t5, r0
-    add    r0, t6
-    TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
+    CHROMA_H_START
+    TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6)
     LOAD_MASK  r2d, r3d
     movd       m6, [r4] ; tc0
     punpcklbw  m6, m6
     pand       m7, m6
     DEBLOCK_P0_Q0
-    TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
+    TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
     RET
 %endmacro
 
@@ -2249,9 +2276,9 @@
 INIT_MMX mmx2
 cglobal deblock_h_chroma_intra_mbaff, 4,6,8
     CHROMA_H_START
-    TRANSPOSE4x8W_LOAD  PASS8ROWS(t5, r0, r1, t6)
+    TRANSPOSE4x4W_LOAD  PASS8ROWS(t5, r0, r1, t6)
     call chroma_intra_body
-    TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
+    TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
     RET
 %endif ; !HIGH_BIT_DEPTH
 
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -1029,59 +1029,48 @@
     jg     .height_loop
     RET
 
+INIT_XMM
 cglobal pixel_avg2_w16_sse2, 6,7
     sub    r4, r2
     lea    r6, [r4+r3]
 .height_loop:
-    movdqu xmm0, [r2]
-    movdqu xmm2, [r2+r3]
-    movdqu xmm1, [r2+r4]
-    movdqu xmm3, [r2+r6]
+    movu   m0, [r2]
+    movu   m2, [r2+r3]
+    movu   m1, [r2+r4]
+    movu   m3, [r2+r6]
     lea    r2, [r2+r3*2]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-    movdqa [r0], xmm0
-    movdqa [r0+r1], xmm2
+    pavgb  m0, m1
+    pavgb  m2, m3
+    mova [r0], m0
+    mova [r0+r1], m2
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
 
-%macro AVG2_W20 1
-cglobal pixel_avg2_w20_%1, 6,7
+cglobal pixel_avg2_w20_sse2, 6,7
     sub    r2, r4
     lea    r6, [r2+r3]
 .height_loop:
-    movdqu xmm0, [r4]
-    movdqu xmm2, [r4+r3]
-%ifidn %1, sse2_misalign
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, [r4+r2]
-    pavgb  xmm2, [r4+r6]
-%else
-    movdqu xmm1, [r4+r2]
-    movdqu xmm3, [r4+r6]
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-%endif
-    pavgb  mm4,  [r4+r2+16]
-    pavgb  mm5,  [r4+r6+16]
+    movu   m0, [r4]
+    movu   m2, [r4+r3]
+    movu   m1, [r4+r2]
+    movu   m3, [r4+r6]
+    movd  mm4, [r4+16]
+    movd  mm5, [r4+r3+16]
+    pavgb  m0, m1
+    pavgb  m2, m3
+    pavgb mm4, [r4+r2+16]
+    pavgb mm5, [r4+r6+16]
     lea    r4, [r4+r3*2]
-    movdqa [r0], xmm0
-    movd   [r0+16], mm4
-    movdqa [r0+r1], xmm2
-    movd   [r0+r1+16], mm5
+    mova [r0], m0
+    mova [r0+r1], m2
+    movd [r0+16], mm4
+    movd [r0+r1+16], mm5
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
-%endmacro
-
-AVG2_W20 sse2
-AVG2_W20 sse2_misalign
 
 INIT_YMM avx2
 cglobal pixel_avg2_w20, 6,7
@@ -1524,7 +1513,7 @@
 %endmacro
 %else ; !HIGH_BIT_DEPTH
 %macro UNPACK_UNALIGNED 3
-%if mmsize == 8 || cpuflag(misalign)
+%if mmsize == 8
     punpcklwd  %1, %3
 %else
     movh       %2, %3
@@ -2130,8 +2119,6 @@
 %else ; !HIGH_BIT_DEPTH
 INIT_MMX mmx2
 MC_CHROMA
-INIT_XMM sse2, misalign
-MC_CHROMA
 INIT_XMM sse2
 MC_CHROMA
 INIT_XMM ssse3

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -1029,59 +1029,48 @@
     jg     .height_loop
     RET
 
+INIT_XMM
 cglobal pixel_avg2_w16_sse2, 6,7
     sub    r4, r2
     lea    r6, [r4+r3]
 .height_loop:
-    movdqu xmm0, [r2]
-    movdqu xmm2, [r2+r3]
-    movdqu xmm1, [r2+r4]
-    movdqu xmm3, [r2+r6]
+    movu   m0, [r2]
+    movu   m2, [r2+r3]
+    movu   m1, [r2+r4]
+    movu   m3, [r2+r6]
     lea    r2, [r2+r3*2]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-    movdqa [r0], xmm0
-    movdqa [r0+r1], xmm2
+    pavgb  m0, m1
+    pavgb  m2, m3
+    mova [r0], m0
+    mova [r0+r1], m2
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
 
-%macro AVG2_W20 1
-cglobal pixel_avg2_w20_%1, 6,7
+cglobal pixel_avg2_w20_sse2, 6,7
     sub    r2, r4
     lea    r6, [r2+r3]
 .height_loop:
-    movdqu xmm0, [r4]
-    movdqu xmm2, [r4+r3]
-%ifidn %1, sse2_misalign
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, [r4+r2]
-    pavgb  xmm2, [r4+r6]
-%else
-    movdqu xmm1, [r4+r2]
-    movdqu xmm3, [r4+r6]
-    movd   mm4,  [r4+16]
-    movd   mm5,  [r4+r3+16]
-    pavgb  xmm0, xmm1
-    pavgb  xmm2, xmm3
-%endif
-    pavgb  mm4,  [r4+r2+16]
-    pavgb  mm5,  [r4+r6+16]
+    movu   m0, [r4]
+    movu   m2, [r4+r3]
+    movu   m1, [r4+r2]
+    movu   m3, [r4+r6]
+    movd  mm4, [r4+16]
+    movd  mm5, [r4+r3+16]
+    pavgb  m0, m1
+    pavgb  m2, m3
+    pavgb mm4, [r4+r2+16]
+    pavgb mm5, [r4+r6+16]
     lea    r4, [r4+r3*2]
-    movdqa [r0], xmm0
-    movd   [r0+16], mm4
-    movdqa [r0+r1], xmm2
-    movd   [r0+r1+16], mm5
+    mova [r0], m0
+    mova [r0+r1], m2
+    movd [r0+16], mm4
+    movd [r0+r1+16], mm5
     lea    r0, [r0+r1*2]
-    sub    r5d, 2
-    jg     .height_loop
+    sub   r5d, 2
+    jg .height_loop
     RET
-%endmacro
-
-AVG2_W20 sse2
-AVG2_W20 sse2_misalign
 
 INIT_YMM avx2
 cglobal pixel_avg2_w20, 6,7
@@ -1524,7 +1513,7 @@
 %endmacro
 %else ; !HIGH_BIT_DEPTH
 %macro UNPACK_UNALIGNED 3
-%if mmsize == 8 || cpuflag(misalign)
+%if mmsize == 8
     punpcklwd  %1, %3
 %else
     movh       %2, %3
@@ -2130,8 +2119,6 @@
 %else ; !HIGH_BIT_DEPTH
 INIT_MMX mmx2
 MC_CHROMA
-INIT_XMM sse2, misalign
-MC_CHROMA
 INIT_XMM sse2
 MC_CHROMA
 INIT_XMM ssse3
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a2.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a2.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a2.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,6 +32,7 @@
 
 SECTION_RODATA 32
 
+pw_1024: times 16 dw 1024
 filt_mul20: times 32 db 20
 filt_mul15: times 16 db 1, -5
 filt_mul51: times 16 db -5, 1
@@ -39,17 +40,25 @@
 deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
 
 %if HIGH_BIT_DEPTH
+v210_mask: times 4 dq 0xc00ffc003ff003ff
+v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15
+v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14
+; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register
+v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800
+           dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800
+
 deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14
 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15
 %else
+deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1
+                       db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1
+
 deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
 deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
-%endif
-pw_1024: times 16 dw 1024
+%endif ; !HIGH_BIT_DEPTH
 
 pd_16: times 4 dd 16
 pd_0f: times 4 dd 0xffff
-pf_inv256: times 8 dd 0.00390625
 
 pad10: times 8 dw    10*PIXEL_MAX
 pad20: times 8 dw    20*PIXEL_MAX
@@ -60,16 +69,22 @@
 tap2: times 4 dw 20, 20
 tap3: times 4 dw -5,  1
 
+pw_0xc000: times 8 dw 0xc000
+pw_31: times 8 dw 31
+pd_4: times 4 dd 4
+
 SECTION .text
 
 cextern pb_0
 cextern pw_1
+cextern pw_8
 cextern pw_16
 cextern pw_32
 cextern pw_512
 cextern pw_00ff
 cextern pw_3fff
 cextern pw_pixel_max
+cextern pw_0to15
 cextern pd_ffff
 
 %macro LOAD_ADD 4
@@ -482,7 +497,7 @@
     %define pw_rnd [pw_32]
 %endif
 ; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer...
-%if cpuflag(misalign) || mmsize==32
+%if mmsize==32
 .loop:
     movu    m4, [src-4]
     movu    m5, [src-2]
@@ -630,8 +645,6 @@
 HPEL_V 0
 INIT_XMM sse2
 HPEL_V 8
-INIT_XMM sse2, misalign
-HPEL_C
 %if ARCH_X86_64 == 0
 INIT_XMM sse2
 HPEL_C
@@ -1197,6 +1210,163 @@
     RET
 %endmacro ; PLANE_DEINTERLEAVE
 
+%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2
+%if cpuflag(ssse3)
+    mova        m3, [deinterleave_rgb_shuf+(%1-3)*16]
+%endif
+%%loopy:
+    mov         %8, r6
+    mov         %9, %6
+%%loopx:
+    movu        m0, [%8]
+    movu        m1, [%8+%1*mmsize/4]
+%if cpuflag(ssse3)
+    pshufb      m0, m3        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    pshufb      m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%elif %1 == 3
+    psrldq      m2, m0, 6
+    punpcklqdq  m0, m1        ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5
+    psrldq      m1, 6
+    punpcklqdq  m2, m1        ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7
+    psrlq       m3, m0, 24
+    psrlq       m4, m2, 24
+    punpckhbw   m1, m0, m3    ; b4 b5 g4 g5 r4 r5
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m2, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m2, m4        ; b2 b3 g2 g3 r2 r3
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%else
+    pshufd      m3, m0, q2301
+    pshufd      m4, m1, q2301
+    punpckhbw   m2, m0, m3    ; b2 b3 g2 g3 r2 r3
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m1, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m1, m4        ; b4 b5 g4 g5 r4 r5
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%endif
+    punpckldq   m2, m0, m1    ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7
+    punpckhdq   m0, m1        ; r0 r1 r2 r3 r4 r5 r6 r7
+    movh   [r0+%9], m2
+    movhps [r2+%9], m2
+    movh   [r4+%9], m0
+    add         %8, %1*mmsize/2
+    add         %9, mmsize/2
+    jl %%loopx
+    add         r0, %2
+    add         r2, %3
+    add         r4, %4
+    add         r6, %5
+    dec        %7d
+    jg %%loopy
+%endmacro
+
+%macro PLANE_DEINTERLEAVE_RGB 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta,
+;                                        pixel *dstb, intptr_t i_dstb,
+;                                        pixel *dstc, intptr_t i_dstc,
+;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_rgb, 8,12
+    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
+    mov        r8d, r9m
+    mov        r9d, r10m
+    add         r0, r8
+    add         r2, r8
+    add         r4, r8
+    neg         r8
+%else
+cglobal plane_copy_deinterleave_rgb, 1,7
+    %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5
+    mov         r1, r9m
+    mov         r2, r2m
+    mov         r4, r4m
+    mov         r6, r6m
+    add         r0, r1
+    add         r2, r1
+    add         r4, r1
+    neg         r1
+    mov        r9m, r1
+    mov         r1, r10m
+%endif
+    cmp  dword r8m, 4
+    je .pw4
+    PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR
+    jmp .ret
+.pw4:
+    PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA
+.ret:
+    REP_RET
+%endmacro
+
+%if HIGH_BIT_DEPTH == 0
+INIT_XMM sse2
+PLANE_DEINTERLEAVE_RGB
+INIT_XMM ssse3
+PLANE_DEINTERLEAVE_RGB
+%endif ; !HIGH_BIT_DEPTH
+
+%macro PLANE_DEINTERLEAVE_V210 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty,
+;                                         uint16_t *dstc, intptr_t i_dstc,
+;                                         uint32_t *src, intptr_t i_src, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_v210, 8,10,7
+%define src   r8
+%define org_w r9
+%define h     r7d
+%else
+cglobal plane_copy_deinterleave_v210, 7,7,7
+%define src   r4m
+%define org_w r6m
+%define h     dword r7m
+%endif
+    FIX_STRIDES r1, r3, r6d
+    shl    r5, 2
+    add    r0, r6
+    add    r2, r6
+    neg    r6
+    mov   src, r4
+    mov org_w, r6
+    mova   m2, [v210_mask]
+    mova   m3, [v210_luma_shuf]
+    mova   m4, [v210_chroma_shuf]
+    mova   m5, [v210_mult] ; also functions as vpermd index for avx2
+    pshufd m6, m5, q1102
+
+ALIGN 16
+.loop:
+    movu   m1, [r4]
+    pandn  m0, m2, m1
+    pand   m1, m2
+    pshufb m0, m3
+    pshufb m1, m4
+    pmulhrsw m0, m5 ; y0 y1 y2 y3 y4 y5 __ __
+    pmulhrsw m1, m6 ; u0 v0 u1 v1 u2 v2 __ __
+%if mmsize == 32
+    vpermd m0, m5, m0
+    vpermd m1, m5, m1
+%endif
+    movu [r0+r6], m0
+    movu [r2+r6], m1
+    add    r4, mmsize
+    add    r6, 3*mmsize/4
+    jl .loop
+    add    r0, r1
+    add    r2, r3
+    add   src, r5
+    mov    r4, src
+    mov    r6, org_w
+    dec     h
+    jg .loop
+    RET
+%endmacro ; PLANE_DEINTERLEAVE_V210
+
 %if HIGH_BIT_DEPTH
 INIT_MMX mmx2
 PLANE_INTERLEAVE
@@ -1205,9 +1375,14 @@
 INIT_XMM sse2
 PLANE_INTERLEAVE
 PLANE_DEINTERLEAVE
+INIT_XMM ssse3
+PLANE_DEINTERLEAVE_V210
 INIT_XMM avx
 PLANE_INTERLEAVE
 PLANE_DEINTERLEAVE
+PLANE_DEINTERLEAVE_V210
+INIT_YMM avx2
+PLANE_DEINTERLEAVE_V210
 %else
 INIT_MMX mmx2
 PLANE_INTERLEAVE
@@ -1813,62 +1988,64 @@
 ;                             uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
 ;-----------------------------------------------------------------------------
 %macro MBTREE 0
-cglobal mbtree_propagate_cost, 7,7,7
-    add        r6d, r6d
-    lea         r0, [r0+r6*2]
-    add         r1, r6
-    add         r2, r6
-    add         r3, r6
-    add         r4, r6
-    neg         r6
-    pxor      xmm4, xmm4
-    movss     xmm6, [r5]
-    shufps    xmm6, xmm6, 0
-    mulps     xmm6, [pf_inv256]
-    movdqa    xmm5, [pw_3fff]
+cglobal mbtree_propagate_cost, 6,6,7
+    movss     m6, [r5]
+    mov      r5d, r6m
+    lea       r0, [r0+r5*2]
+    add      r5d, r5d
+    add       r1, r5
+    add       r2, r5
+    add       r3, r5
+    add       r4, r5
+    neg       r5
+    pxor      m4, m4
+    shufps    m6, m6, 0
+    mova      m5, [pw_3fff]
 .loop:
-    movq      xmm2, [r2+r6] ; intra
-    movq      xmm0, [r4+r6] ; invq
-    movq      xmm3, [r3+r6] ; inter
-    movq      xmm1, [r1+r6] ; prop
-    punpcklwd xmm2, xmm4
-    punpcklwd xmm0, xmm4
-    pmaddwd   xmm0, xmm2
-    pand      xmm3, xmm5
-    punpcklwd xmm1, xmm4
-    punpcklwd xmm3, xmm4
+    movq      m2, [r2+r5] ; intra
+    movq      m0, [r4+r5] ; invq
+    movq      m3, [r3+r5] ; inter
+    movq      m1, [r1+r5] ; prop
+    pand      m3, m5
+    pminsw    m3, m2
+    punpcklwd m2, m4
+    punpcklwd m0, m4
+    pmaddwd   m0, m2
+    punpcklwd m1, m4
+    punpcklwd m3, m4
 %if cpuflag(fma4)
-    cvtdq2ps  xmm0, xmm0
-    cvtdq2ps  xmm1, xmm1
-    fmaddps   xmm0, xmm0, xmm6, xmm1
-    cvtdq2ps  xmm1, xmm2
-    psubd     xmm2, xmm3
-    cvtdq2ps  xmm2, xmm2
-    rcpps     xmm3, xmm1
-    mulps     xmm1, xmm3
-    mulps     xmm0, xmm2
-    addps     xmm2, xmm3, xmm3
-    fnmaddps  xmm3, xmm1, xmm3, xmm2
-    mulps     xmm0, xmm3
+    cvtdq2ps  m0, m0
+    cvtdq2ps  m1, m1
+    fmaddps   m0, m0, m6, m1
+    cvtdq2ps  m1, m2
+    psubd     m2, m3
+    cvtdq2ps  m2, m2
+    rcpps     m3, m1
+    mulps     m1, m3
+    mulps     m0, m2
+    addps     m2, m3, m3
+    fnmaddps  m3, m1, m3, m2
+    mulps     m0, m3
 %else
-    cvtdq2ps  xmm0, xmm0
-    mulps     xmm0, xmm6    ; intra*invq*fps_factor>>8
-    cvtdq2ps  xmm1, xmm1    ; prop
-    addps     xmm0, xmm1    ; prop + (intra*invq*fps_factor>>8)
-    cvtdq2ps  xmm1, xmm2    ; intra
-    psubd     xmm2, xmm3    ; intra - inter
-    cvtdq2ps  xmm2, xmm2    ; intra - inter
-    rcpps     xmm3, xmm1    ; 1 / intra 1st approximation
-    mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)
-    mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)^2
-    mulps     xmm0, xmm2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
-    addps     xmm3, xmm3    ; 2 * (1/intra 1st approx)
-    subps     xmm3, xmm1    ; 2nd approximation for 1/intra
-    mulps     xmm0, xmm3    ; / intra
-%endif
-    cvtps2dq  xmm0, xmm0
-    movdqa [r0+r6*2], xmm0
-    add         r6, 8
+    cvtdq2ps  m0, m0
+    mulps     m0, m6    ; intra*invq*fps_factor>>8
+    cvtdq2ps  m1, m1    ; prop
+    addps     m0, m1    ; prop + (intra*invq*fps_factor>>8)
+    cvtdq2ps  m1, m2    ; intra
+    psubd     m2, m3    ; intra - inter
+    cvtdq2ps  m2, m2    ; intra - inter
+    rcpps     m3, m1    ; 1 / intra 1st approximation
+    mulps     m1, m3    ; intra * (1/intra 1st approx)
+    mulps     m1, m3    ; intra * (1/intra 1st approx)^2
+    mulps     m0, m2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
+    addps     m3, m3    ; 2 * (1/intra 1st approx)
+    subps     m3, m1    ; 2nd approximation for 1/intra
+    mulps     m0, m3    ; / intra
+%endif
+    cvtps2dq  m0, m0
+    packssdw  m0, m0
+    movh [r0+r5], m0
+    add       r5, 8
     jl .loop
     RET
 %endmacro
@@ -1880,34 +2057,35 @@
 MBTREE
 
 %macro INT16_UNPACK 1
-    vpunpckhwd   xm4, xm%1, xm7
-    vpunpcklwd  xm%1, xm7
-    vinsertf128  m%1, m%1, xm4, 1
+    punpckhwd   xm4, xm%1, xm7
+    punpcklwd  xm%1, xm7
+    vinsertf128 m%1, m%1, xm4, 1
 %endmacro
 
-; FIXME: align loads/stores to 16 bytes
-%macro MBTREE_AVX 0
-cglobal mbtree_propagate_cost, 7,7,8
-    add          r6d, r6d
-    lea           r0, [r0+r6*2]
-    add           r1, r6
-    add           r2, r6
-    add           r3, r6
-    add           r4, r6
-    neg           r6
-    mova         xm5, [pw_3fff]
-    vbroadcastss  m6, [r5]
-    mulps         m6, [pf_inv256]
+; FIXME: align loads to 16 bytes
+%macro MBTREE_AVX 1
+cglobal mbtree_propagate_cost, 6,6,%1
+    vbroadcastss m6, [r5]
+    mov         r5d, r6m
+    lea          r0, [r0+r5*2]
+    add         r5d, r5d
+    add          r1, r5
+    add          r2, r5
+    add          r3, r5
+    add          r4, r5
+    neg          r5
+    mova        xm5, [pw_3fff]
 %if notcpuflag(avx2)
-    pxor         xm7, xm7
+    pxor        xm7, xm7
 %endif
 .loop:
 %if cpuflag(avx2)
-    pmovzxwd     m0, [r2+r6]      ; intra
-    pmovzxwd     m1, [r4+r6]      ; invq
-    pmovzxwd     m2, [r1+r6]      ; prop
-    pand        xm3, xm5, [r3+r6] ; inter
+    pmovzxwd     m0, [r2+r5]      ; intra
+    pmovzxwd     m1, [r4+r5]      ; invq
+    pmovzxwd     m2, [r1+r5]      ; prop
+    pand        xm3, xm5, [r3+r5] ; inter
     pmovzxwd     m3, xm3
+    pminsd       m3, m0
     pmaddwd      m1, m0
     psubd        m4, m0, m3
     cvtdq2ps     m0, m0
@@ -1922,10 +2100,11 @@
     fnmaddps     m4, m2, m3, m4
     mulps        m1, m4
 %else
-    movu        xm0, [r2+r6]
-    movu        xm1, [r4+r6]
-    movu        xm2, [r1+r6]
-    pand        xm3, xm5, [r3+r6]
+    movu        xm0, [r2+r5]
+    movu        xm1, [r4+r5]
+    movu        xm2, [r1+r5]
+    pand        xm3, xm5, [r3+r5]
+    pminsw      xm3, xm0
     INT16_UNPACK 0
     INT16_UNPACK 1
     INT16_UNPACK 2
@@ -1947,13 +2126,107 @@
     mulps        m1, m3         ; / intra
 %endif
     vcvtps2dq    m1, m1
-    movu  [r0+r6*2], m1
-    add          r6, 16
+    vextractf128 xm2, m1, 1
+    packssdw    xm1, xm2
+    mova    [r0+r5], xm1
+    add          r5, 16
     jl .loop
     RET
 %endmacro
 
 INIT_YMM avx
-MBTREE_AVX
+MBTREE_AVX 8
 INIT_YMM avx2,fma3
-MBTREE_AVX
+MBTREE_AVX 7
+
+%macro MBTREE_PROPAGATE_LIST 0
+;-----------------------------------------------------------------------------
+; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int *propagate_amount, uint16_t *lowres_costs,
+;                                      int16_t *output, int bipred_weight, int mb_y, int len )
+;-----------------------------------------------------------------------------
+cglobal mbtree_propagate_list_internal, 4,6,8
+    movh     m6, [pw_0to15] ; mb_x
+    movd     m7, r5m
+    pshuflw  m7, m7, 0
+    punpcklwd m6, m7       ; 0 y 1 y 2 y 3 y
+    movd     m7, r4m
+    SPLATW   m7, m7        ; bipred_weight
+    psllw    m7, 9         ; bipred_weight << 9
+
+    mov     r5d, r6m
+    xor     r4d, r4d
+.loop:
+    mova     m3, [r1+r4*2]
+    movu     m4, [r2+r4*2]
+    mova     m5, [pw_0xc000]
+    pand     m4, m5
+    pcmpeqw  m4, m5
+    pmulhrsw m5, m3, m7    ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
+%if cpuflag(avx)
+    pblendvb m5, m3, m5, m4
+%else
+    pand     m5, m4
+    pandn    m4, m3
+    por      m5, m4        ; if( lists_used == 3 )
+                           ;     propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
+%endif
+
+    movu     m0, [r0+r4*4] ; x,y
+    movu     m1, [r0+r4*4+mmsize]
+
+    psraw    m2, m0, 5
+    psraw    m3, m1, 5
+    mova     m4, [pd_4]
+    paddw    m2, m6        ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
+    paddw    m6, m4        ; {mbx, mby} += {4, 0}
+    paddw    m3, m6        ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
+    paddw    m6, m4        ; {mbx, mby} += {4, 0}
+
+    mova [r3+mmsize*0], m2
+    mova [r3+mmsize*1], m3
+
+    mova     m3, [pw_31]
+    pand     m0, m3        ; x &= 31
+    pand     m1, m3        ; y &= 31
+    packuswb m0, m1
+    psrlw    m1, m0, 3
+    pand     m0, m3        ; x
+    SWAP      1, 3
+    pandn    m1, m3        ; y premultiplied by (1<<5) for later use of pmulhrsw
+
+    mova     m3, [pw_32]
+    psubw    m3, m0        ; 32 - x
+    mova     m4, [pw_1024]
+    psubw    m4, m1        ; (32 - y) << 5
+
+    pmullw   m2, m3, m4    ; idx0weight = (32-y)*(32-x) << 5
+    pmullw   m4, m0        ; idx1weight = (32-y)*x << 5
+    pmullw   m0, m1        ; idx3weight = y*x << 5
+    pmullw   m1, m3        ; idx2weight = y*(32-x) << 5
+
+    ; avoid overflow in the input to pmulhrsw
+    psrlw    m3, m2, 15
+    psubw    m2, m3        ; idx0weight -= (idx0weight == 32768)
+
+    pmulhrsw m2, m5        ; idx0weight * propagate_amount + 512 >> 10
+    pmulhrsw m4, m5        ; idx1weight * propagate_amount + 512 >> 10
+    pmulhrsw m1, m5        ; idx2weight * propagate_amount + 512 >> 10
+    pmulhrsw m0, m5        ; idx3weight * propagate_amount + 512 >> 10
+
+    SBUTTERFLY wd, 2, 4, 3
+    SBUTTERFLY wd, 1, 0, 3
+    mova [r3+mmsize*2], m2
+    mova [r3+mmsize*3], m4
+    mova [r3+mmsize*4], m1
+    mova [r3+mmsize*5], m0
+    add     r4d, mmsize/2
+    add      r3, mmsize*6
+    cmp     r4d, r5d
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM ssse3
+MBTREE_PROPAGATE_LIST
+INIT_XMM avx
+MBTREE_PROPAGATE_LIST

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* mc-a2.asm: x86 motion compensation
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,6 +32,7 @@
 
 SECTION_RODATA 32
 
+pw_1024: times 16 dw 1024
 filt_mul20: times 32 db 20
 filt_mul15: times 16 db 1, -5
 filt_mul51: times 16 db -5, 1
@@ -39,17 +40,25 @@
 deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
 
 %if HIGH_BIT_DEPTH
+v210_mask: times 4 dq 0xc00ffc003ff003ff
+v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15
+v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14
+; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register
+v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800
+           dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800
+
 deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14
 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15
 %else
+deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1
+                       db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1
+
 deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
 deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
-%endif
-pw_1024: times 16 dw 1024
+%endif ; !HIGH_BIT_DEPTH
 
 pd_16: times 4 dd 16
 pd_0f: times 4 dd 0xffff
-pf_inv256: times 8 dd 0.00390625
 
 pad10: times 8 dw    10*PIXEL_MAX
 pad20: times 8 dw    20*PIXEL_MAX
@@ -60,16 +69,22 @@
 tap2: times 4 dw 20, 20
 tap3: times 4 dw -5,  1
 
+pw_0xc000: times 8 dw 0xc000
+pw_31: times 8 dw 31
+pd_4: times 4 dd 4
+
 SECTION .text
 
 cextern pb_0
 cextern pw_1
+cextern pw_8
 cextern pw_16
 cextern pw_32
 cextern pw_512
 cextern pw_00ff
 cextern pw_3fff
 cextern pw_pixel_max
+cextern pw_0to15
 cextern pd_ffff
 
 %macro LOAD_ADD 4
@@ -482,7 +497,7 @@
     %define pw_rnd [pw_32]
 %endif
 ; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer...
-%if cpuflag(misalign) || mmsize==32
+%if mmsize==32
 .loop:
     movu    m4, [src-4]
     movu    m5, [src-2]
@@ -630,8 +645,6 @@
 HPEL_V 0
 INIT_XMM sse2
 HPEL_V 8
-INIT_XMM sse2, misalign
-HPEL_C
 %if ARCH_X86_64 == 0
 INIT_XMM sse2
 HPEL_C
@@ -1197,6 +1210,163 @@
     RET
 %endmacro ; PLANE_DEINTERLEAVE
 
+%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2
+%if cpuflag(ssse3)
+    mova        m3, [deinterleave_rgb_shuf+(%1-3)*16]
+%endif
+%%loopy:
+    mov         %8, r6
+    mov         %9, %6
+%%loopx:
+    movu        m0, [%8]
+    movu        m1, [%8+%1*mmsize/4]
+%if cpuflag(ssse3)
+    pshufb      m0, m3        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    pshufb      m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%elif %1 == 3
+    psrldq      m2, m0, 6
+    punpcklqdq  m0, m1        ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5
+    psrldq      m1, 6
+    punpcklqdq  m2, m1        ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7
+    psrlq       m3, m0, 24
+    psrlq       m4, m2, 24
+    punpckhbw   m1, m0, m3    ; b4 b5 g4 g5 r4 r5
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m2, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m2, m4        ; b2 b3 g2 g3 r2 r3
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%else
+    pshufd      m3, m0, q2301
+    pshufd      m4, m1, q2301
+    punpckhbw   m2, m0, m3    ; b2 b3 g2 g3 r2 r3
+    punpcklbw   m0, m3        ; b0 b1 g0 g1 r0 r1
+    punpckhbw   m3, m1, m4    ; b6 b7 g6 g7 r6 r7
+    punpcklbw   m1, m4        ; b4 b5 g4 g5 r4 r5
+    punpcklwd   m0, m2        ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
+    punpcklwd   m1, m3        ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
+%endif
+    punpckldq   m2, m0, m1    ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7
+    punpckhdq   m0, m1        ; r0 r1 r2 r3 r4 r5 r6 r7
+    movh   [r0+%9], m2
+    movhps [r2+%9], m2
+    movh   [r4+%9], m0
+    add         %8, %1*mmsize/2
+    add         %9, mmsize/2
+    jl %%loopx
+    add         r0, %2
+    add         r2, %3
+    add         r4, %4
+    add         r6, %5
+    dec        %7d
+    jg %%loopy
+%endmacro
+
+%macro PLANE_DEINTERLEAVE_RGB 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta,
+;                                        pixel *dstb, intptr_t i_dstb,
+;                                        pixel *dstc, intptr_t i_dstc,
+;                                        pixel *src,  intptr_t i_src, int pw, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_rgb, 8,12
+    %define %%args r1, r3, r5, r7, r8, r9, r10, r11
+    mov        r8d, r9m
+    mov        r9d, r10m
+    add         r0, r8
+    add         r2, r8
+    add         r4, r8
+    neg         r8
+%else
+cglobal plane_copy_deinterleave_rgb, 1,7
+    %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5
+    mov         r1, r9m
+    mov         r2, r2m
+    mov         r4, r4m
+    mov         r6, r6m
+    add         r0, r1
+    add         r2, r1
+    add         r4, r1
+    neg         r1
+    mov        r9m, r1
+    mov         r1, r10m
+%endif
+    cmp  dword r8m, 4
+    je .pw4
+    PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR
+    jmp .ret
+.pw4:
+    PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA
+.ret:
+    REP_RET
+%endmacro
+
+%if HIGH_BIT_DEPTH == 0
+INIT_XMM sse2
+PLANE_DEINTERLEAVE_RGB
+INIT_XMM ssse3
+PLANE_DEINTERLEAVE_RGB
+%endif ; !HIGH_BIT_DEPTH
+
+%macro PLANE_DEINTERLEAVE_V210 0
+;-----------------------------------------------------------------------------
+; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty,
+;                                         uint16_t *dstc, intptr_t i_dstc,
+;                                         uint32_t *src, intptr_t i_src, int w, int h )
+;-----------------------------------------------------------------------------
+%if ARCH_X86_64
+cglobal plane_copy_deinterleave_v210, 8,10,7
+%define src   r8
+%define org_w r9
+%define h     r7d
+%else
+cglobal plane_copy_deinterleave_v210, 7,7,7
+%define src   r4m
+%define org_w r6m
+%define h     dword r7m
+%endif
+    FIX_STRIDES r1, r3, r6d
+    shl    r5, 2
+    add    r0, r6
+    add    r2, r6
+    neg    r6
+    mov   src, r4
+    mov org_w, r6
+    mova   m2, [v210_mask]
+    mova   m3, [v210_luma_shuf]
+    mova   m4, [v210_chroma_shuf]
+    mova   m5, [v210_mult] ; also functions as vpermd index for avx2
+    pshufd m6, m5, q1102
+
+ALIGN 16
+.loop:
+    movu   m1, [r4]
+    pandn  m0, m2, m1
+    pand   m1, m2
+    pshufb m0, m3
+    pshufb m1, m4
+    pmulhrsw m0, m5 ; y0 y1 y2 y3 y4 y5 __ __
+    pmulhrsw m1, m6 ; u0 v0 u1 v1 u2 v2 __ __
+%if mmsize == 32
+    vpermd m0, m5, m0
+    vpermd m1, m5, m1
+%endif
+    movu [r0+r6], m0
+    movu [r2+r6], m1
+    add    r4, mmsize
+    add    r6, 3*mmsize/4
+    jl .loop
+    add    r0, r1
+    add    r2, r3
+    add   src, r5
+    mov    r4, src
+    mov    r6, org_w
+    dec     h
+    jg .loop
+    RET
+%endmacro ; PLANE_DEINTERLEAVE_V210
+
 %if HIGH_BIT_DEPTH
 INIT_MMX mmx2
 PLANE_INTERLEAVE
@@ -1205,9 +1375,14 @@
 INIT_XMM sse2
 PLANE_INTERLEAVE
 PLANE_DEINTERLEAVE
+INIT_XMM ssse3
+PLANE_DEINTERLEAVE_V210
 INIT_XMM avx
 PLANE_INTERLEAVE
 PLANE_DEINTERLEAVE
+PLANE_DEINTERLEAVE_V210
+INIT_YMM avx2
+PLANE_DEINTERLEAVE_V210
 %else
 INIT_MMX mmx2
 PLANE_INTERLEAVE
@@ -1813,62 +1988,64 @@
 ;                             uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
 ;-----------------------------------------------------------------------------
 %macro MBTREE 0
-cglobal mbtree_propagate_cost, 7,7,7
-    add        r6d, r6d
-    lea         r0, [r0+r6*2]
-    add         r1, r6
-    add         r2, r6
-    add         r3, r6
-    add         r4, r6
-    neg         r6
-    pxor      xmm4, xmm4
-    movss     xmm6, [r5]
-    shufps    xmm6, xmm6, 0
-    mulps     xmm6, [pf_inv256]
-    movdqa    xmm5, [pw_3fff]
+cglobal mbtree_propagate_cost, 6,6,7
+    movss     m6, [r5]
+    mov      r5d, r6m
+    lea       r0, [r0+r5*2]
+    add      r5d, r5d
+    add       r1, r5
+    add       r2, r5
+    add       r3, r5
+    add       r4, r5
+    neg       r5
+    pxor      m4, m4
+    shufps    m6, m6, 0
+    mova      m5, [pw_3fff]
 .loop:
-    movq      xmm2, [r2+r6] ; intra
-    movq      xmm0, [r4+r6] ; invq
-    movq      xmm3, [r3+r6] ; inter
-    movq      xmm1, [r1+r6] ; prop
-    punpcklwd xmm2, xmm4
-    punpcklwd xmm0, xmm4
-    pmaddwd   xmm0, xmm2
-    pand      xmm3, xmm5
-    punpcklwd xmm1, xmm4
-    punpcklwd xmm3, xmm4
+    movq      m2, [r2+r5] ; intra
+    movq      m0, [r4+r5] ; invq
+    movq      m3, [r3+r5] ; inter
+    movq      m1, [r1+r5] ; prop
+    pand      m3, m5
+    pminsw    m3, m2
+    punpcklwd m2, m4
+    punpcklwd m0, m4
+    pmaddwd   m0, m2
+    punpcklwd m1, m4
+    punpcklwd m3, m4
 %if cpuflag(fma4)
-    cvtdq2ps  xmm0, xmm0
-    cvtdq2ps  xmm1, xmm1
-    fmaddps   xmm0, xmm0, xmm6, xmm1
-    cvtdq2ps  xmm1, xmm2
-    psubd     xmm2, xmm3
-    cvtdq2ps  xmm2, xmm2
-    rcpps     xmm3, xmm1
-    mulps     xmm1, xmm3
-    mulps     xmm0, xmm2
-    addps     xmm2, xmm3, xmm3
-    fnmaddps  xmm3, xmm1, xmm3, xmm2
-    mulps     xmm0, xmm3
+    cvtdq2ps  m0, m0
+    cvtdq2ps  m1, m1
+    fmaddps   m0, m0, m6, m1
+    cvtdq2ps  m1, m2
+    psubd     m2, m3
+    cvtdq2ps  m2, m2
+    rcpps     m3, m1
+    mulps     m1, m3
+    mulps     m0, m2
+    addps     m2, m3, m3
+    fnmaddps  m3, m1, m3, m2
+    mulps     m0, m3
 %else
-    cvtdq2ps  xmm0, xmm0
-    mulps     xmm0, xmm6    ; intra*invq*fps_factor>>8
-    cvtdq2ps  xmm1, xmm1    ; prop
-    addps     xmm0, xmm1    ; prop + (intra*invq*fps_factor>>8)
-    cvtdq2ps  xmm1, xmm2    ; intra
-    psubd     xmm2, xmm3    ; intra - inter
-    cvtdq2ps  xmm2, xmm2    ; intra - inter
-    rcpps     xmm3, xmm1    ; 1 / intra 1st approximation
-    mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)
-    mulps     xmm1, xmm3    ; intra * (1/intra 1st approx)^2
-    mulps     xmm0, xmm2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
-    addps     xmm3, xmm3    ; 2 * (1/intra 1st approx)
-    subps     xmm3, xmm1    ; 2nd approximation for 1/intra
-    mulps     xmm0, xmm3    ; / intra
-%endif
-    cvtps2dq  xmm0, xmm0
-    movdqa [r0+r6*2], xmm0
-    add         r6, 8
+    cvtdq2ps  m0, m0
+    mulps     m0, m6    ; intra*invq*fps_factor>>8
+    cvtdq2ps  m1, m1    ; prop
+    addps     m0, m1    ; prop + (intra*invq*fps_factor>>8)
+    cvtdq2ps  m1, m2    ; intra
+    psubd     m2, m3    ; intra - inter
+    cvtdq2ps  m2, m2    ; intra - inter
+    rcpps     m3, m1    ; 1 / intra 1st approximation
+    mulps     m1, m3    ; intra * (1/intra 1st approx)
+    mulps     m1, m3    ; intra * (1/intra 1st approx)^2
+    mulps     m0, m2    ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
+    addps     m3, m3    ; 2 * (1/intra 1st approx)
+    subps     m3, m1    ; 2nd approximation for 1/intra
+    mulps     m0, m3    ; / intra
+%endif
+    cvtps2dq  m0, m0
+    packssdw  m0, m0
+    movh [r0+r5], m0
+    add       r5, 8
     jl .loop
     RET
 %endmacro
@@ -1880,34 +2057,35 @@
 MBTREE
 
 %macro INT16_UNPACK 1
-    vpunpckhwd   xm4, xm%1, xm7
-    vpunpcklwd  xm%1, xm7
-    vinsertf128  m%1, m%1, xm4, 1
+    punpckhwd   xm4, xm%1, xm7
+    punpcklwd  xm%1, xm7
+    vinsertf128 m%1, m%1, xm4, 1
 %endmacro
 
-; FIXME: align loads/stores to 16 bytes
-%macro MBTREE_AVX 0
-cglobal mbtree_propagate_cost, 7,7,8
-    add          r6d, r6d
-    lea           r0, [r0+r6*2]
-    add           r1, r6
-    add           r2, r6
-    add           r3, r6
-    add           r4, r6
-    neg           r6
-    mova         xm5, [pw_3fff]
-    vbroadcastss  m6, [r5]
-    mulps         m6, [pf_inv256]
+; FIXME: align loads to 16 bytes
+%macro MBTREE_AVX 1
+cglobal mbtree_propagate_cost, 6,6,%1
+    vbroadcastss m6, [r5]
+    mov         r5d, r6m
+    lea          r0, [r0+r5*2]
+    add         r5d, r5d
+    add          r1, r5
+    add          r2, r5
+    add          r3, r5
+    add          r4, r5
+    neg          r5
+    mova        xm5, [pw_3fff]
 %if notcpuflag(avx2)
-    pxor         xm7, xm7
+    pxor        xm7, xm7
 %endif
 .loop:
 %if cpuflag(avx2)
-    pmovzxwd     m0, [r2+r6]      ; intra
-    pmovzxwd     m1, [r4+r6]      ; invq
-    pmovzxwd     m2, [r1+r6]      ; prop
-    pand        xm3, xm5, [r3+r6] ; inter
+    pmovzxwd     m0, [r2+r5]      ; intra
+    pmovzxwd     m1, [r4+r5]      ; invq
+    pmovzxwd     m2, [r1+r5]      ; prop
+    pand        xm3, xm5, [r3+r5] ; inter
     pmovzxwd     m3, xm3
+    pminsd       m3, m0
     pmaddwd      m1, m0
     psubd        m4, m0, m3
     cvtdq2ps     m0, m0
@@ -1922,10 +2100,11 @@
     fnmaddps     m4, m2, m3, m4
     mulps        m1, m4
 %else
-    movu        xm0, [r2+r6]
-    movu        xm1, [r4+r6]
-    movu        xm2, [r1+r6]
-    pand        xm3, xm5, [r3+r6]
+    movu        xm0, [r2+r5]
+    movu        xm1, [r4+r5]
+    movu        xm2, [r1+r5]
+    pand        xm3, xm5, [r3+r5]
+    pminsw      xm3, xm0
     INT16_UNPACK 0
     INT16_UNPACK 1
     INT16_UNPACK 2
@@ -1947,13 +2126,107 @@
     mulps        m1, m3         ; / intra
 %endif
     vcvtps2dq    m1, m1
-    movu  [r0+r6*2], m1
-    add          r6, 16
+    vextractf128 xm2, m1, 1
+    packssdw    xm1, xm2
+    mova    [r0+r5], xm1
+    add          r5, 16
     jl .loop
     RET
 %endmacro
 
 INIT_YMM avx
-MBTREE_AVX
+MBTREE_AVX 8
 INIT_YMM avx2,fma3
-MBTREE_AVX
+MBTREE_AVX 7
+
+%macro MBTREE_PROPAGATE_LIST 0
+;-----------------------------------------------------------------------------
+; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int *propagate_amount, uint16_t *lowres_costs,
+;                                      int16_t *output, int bipred_weight, int mb_y, int len )
+;-----------------------------------------------------------------------------
+cglobal mbtree_propagate_list_internal, 4,6,8
+    movh     m6, [pw_0to15] ; mb_x
+    movd     m7, r5m
+    pshuflw  m7, m7, 0
+    punpcklwd m6, m7       ; 0 y 1 y 2 y 3 y
+    movd     m7, r4m
+    SPLATW   m7, m7        ; bipred_weight
+    psllw    m7, 9         ; bipred_weight << 9
+
+    mov     r5d, r6m
+    xor     r4d, r4d
+.loop:
+    mova     m3, [r1+r4*2]
+    movu     m4, [r2+r4*2]
+    mova     m5, [pw_0xc000]
+    pand     m4, m5
+    pcmpeqw  m4, m5
+    pmulhrsw m5, m3, m7    ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
+%if cpuflag(avx)
+    pblendvb m5, m3, m5, m4
+%else
+    pand     m5, m4
+    pandn    m4, m3
+    por      m5, m4        ; if( lists_used == 3 )
+                           ;     propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
+%endif
+
+    movu     m0, [r0+r4*4] ; x,y
+    movu     m1, [r0+r4*4+mmsize]
+
+    psraw    m2, m0, 5
+    psraw    m3, m1, 5
+    mova     m4, [pd_4]
+    paddw    m2, m6        ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
+    paddw    m6, m4        ; {mbx, mby} += {4, 0}
+    paddw    m3, m6        ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
+    paddw    m6, m4        ; {mbx, mby} += {4, 0}
+
+    mova [r3+mmsize*0], m2
+    mova [r3+mmsize*1], m3
+
+    mova     m3, [pw_31]
+    pand     m0, m3        ; x &= 31
+    pand     m1, m3        ; y &= 31
+    packuswb m0, m1
+    psrlw    m1, m0, 3
+    pand     m0, m3        ; x
+    SWAP      1, 3
+    pandn    m1, m3        ; y premultiplied by (1<<5) for later use of pmulhrsw
+
+    mova     m3, [pw_32]
+    psubw    m3, m0        ; 32 - x
+    mova     m4, [pw_1024]
+    psubw    m4, m1        ; (32 - y) << 5
+
+    pmullw   m2, m3, m4    ; idx0weight = (32-y)*(32-x) << 5
+    pmullw   m4, m0        ; idx1weight = (32-y)*x << 5
+    pmullw   m0, m1        ; idx3weight = y*x << 5
+    pmullw   m1, m3        ; idx2weight = y*(32-x) << 5
+
+    ; avoid overflow in the input to pmulhrsw
+    psrlw    m3, m2, 15
+    psubw    m2, m3        ; idx0weight -= (idx0weight == 32768)
+
+    pmulhrsw m2, m5        ; idx0weight * propagate_amount + 512 >> 10
+    pmulhrsw m4, m5        ; idx1weight * propagate_amount + 512 >> 10
+    pmulhrsw m1, m5        ; idx2weight * propagate_amount + 512 >> 10
+    pmulhrsw m0, m5        ; idx3weight * propagate_amount + 512 >> 10
+
+    SBUTTERFLY wd, 2, 4, 3
+    SBUTTERFLY wd, 1, 0, 3
+    mova [r3+mmsize*2], m2
+    mova [r3+mmsize*3], m4
+    mova [r3+mmsize*4], m1
+    mova [r3+mmsize*5], m0
+    add     r4d, mmsize/2
+    add      r3, mmsize*6
+    cmp     r4d, r5d
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM ssse3
+MBTREE_PROPAGATE_LIST
+INIT_XMM avx
+MBTREE_PROPAGATE_LIST
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-c.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -116,6 +116,23 @@
 void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
                                        uint16_t *dstv, intptr_t i_dstv,
                                        uint16_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx  ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
 void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
@@ -144,13 +161,13 @@
 void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
-void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                            uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
 
 #define MC_CHROMA(cpu)\
@@ -158,7 +175,6 @@
                            int dx, int dy, int i_width, int i_height );
 MC_CHROMA(mmx2)
 MC_CHROMA(sse2)
-MC_CHROMA(sse2_misalign)
 MC_CHROMA(ssse3)
 MC_CHROMA(ssse3_cache64)
 MC_CHROMA(avx)
@@ -186,7 +202,6 @@
 PIXEL_AVG_WALL(cache64_mmx2)
 PIXEL_AVG_WALL(cache64_sse2)
 PIXEL_AVG_WALL(sse2)
-PIXEL_AVG_WALL(sse2_misalign)
 PIXEL_AVG_WALL(cache64_ssse3)
 PIXEL_AVG_WALL(avx2)
 
@@ -227,7 +242,6 @@
 PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2)
 #endif
 PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2)
-PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign)
 PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2)
@@ -429,7 +443,6 @@
 GET_REF(cache32_mmx2)
 GET_REF(cache64_mmx2)
 #endif
-GET_REF(sse2_misalign)
 GET_REF(cache64_sse2)
 GET_REF(cache64_ssse3)
 GET_REF(cache64_ssse3_atom)
@@ -477,7 +490,6 @@
 HPEL(16, avx, avx, avx, avx)
 HPEL(32, avx2, avx2, avx2, avx2)
 #endif
-HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
 #endif // HIGH_BIT_DEPTH
 
 static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
@@ -521,6 +533,113 @@
 PLANE_INTERLEAVE(avx)
 #endif
 
+#if HAVE_X86_INLINE_ASM
+#define CLIP_ADD(s,x)\
+do\
+{\
+    int temp;\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %2, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %1         \n"\
+        :"+m"(s), "=&r"(temp)\
+        :"m"(x)\
+    );\
+    s = temp;\
+} while(0)
+
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %1, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %0         \n"\
+        :"+m"(M32(s))\
+        :"m"(M32(x))\
+    );\
+} while(0)
+#else
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    CLIP_ADD((s)[0], (x)[0]);\
+    CLIP_ADD((s)[1], (x)[1]);\
+} while(0)
+#endif
+
+#define PROPAGATE_LIST(cpu)\
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
+                                                uint16_t *lowres_costs, int16_t *output,\
+                                                int bipred_weight, int mb_y, int len );\
+\
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
+                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
+                                              int bipred_weight, int mb_y, int len, int list )\
+{\
+    int16_t *current = h->scratch_buffer2;\
+\
+    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
+                                               current, bipred_weight, mb_y, len );\
+\
+    unsigned stride = h->mb.i_mb_stride;\
+    unsigned width = h->mb.i_mb_width;\
+    unsigned height = h->mb.i_mb_height;\
+\
+    for( unsigned i = 0; i < len; current += 32 )\
+    {\
+        int end = X264_MIN( i+8, len );\
+        for( ; i < end; i++, current += 2 )\
+        {\
+            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
+                continue;\
+\
+            unsigned mbx = current[0];\
+            unsigned mby = current[1];\
+            unsigned idx0 = mbx + mby * stride;\
+            unsigned idx2 = idx0 + stride;\
+\
+            /* Shortcut for the simple/common case of zero MV */\
+            if( !M32( mvs[i] ) )\
+            {\
+                CLIP_ADD( ref_costs[idx0], current[16] );\
+                continue;\
+            }\
+\
+            if( mbx < width-1 && mby < height-1 )\
+            {\
+                CLIP_ADD2( ref_costs+idx0, current+16 );\
+                CLIP_ADD2( ref_costs+idx2, current+32 );\
+            }\
+            else\
+            {\
+                /* Note: this takes advantage of unsigned representation to\
+                 * catch negative mbx/mby. */\
+                if( mby < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx0+0], current[16] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx0+1], current[17] );\
+                }\
+                if( mby+1 < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx2+0], current[32] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx2+1], current[33] );\
+                }\
+            }\
+        }\
+    }\
+}
+
+PROPAGATE_LIST(ssse3)
+PROPAGATE_LIST(avx)
+#undef CLIP_ADD
+#undef CLIP_ADD2
+
 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
 {
     if( !(cpu&X264_CPU_MMX) )
@@ -632,6 +751,8 @@
         return;
 
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
 
     if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) )
         pf->integral_init4v = x264_integral_init4v_ssse3;
@@ -644,6 +765,7 @@
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
     pf->plane_copy_interleave        = x264_plane_copy_interleave_avx;
     pf->plane_copy_deinterleave      = x264_plane_copy_deinterleave_avx;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx;
     pf->store_interleave_chroma      = x264_store_interleave_chroma_avx;
     pf->copy[PIXEL_16x16]            = x264_mc_copy_w16_aligned_avx;
 
@@ -654,7 +776,10 @@
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_xop;
 
     if( cpu&X264_CPU_AVX2 )
+    {
         pf->mc_luma = mc_luma_avx2;
+        pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2;
+    }
 #else // !HIGH_BIT_DEPTH
 
 #if ARCH_X86 // all x86_64 cpus with cacheline split issues use sse2 instead
@@ -679,6 +804,7 @@
     pf->integral_init8v = x264_integral_init8v_sse2;
     pf->hpel_filter = x264_hpel_filter_sse2_amd;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2;
 
     if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
     {
@@ -696,8 +822,6 @@
         pf->avg[PIXEL_8x8]  = x264_pixel_avg_8x8_sse2;
         pf->avg[PIXEL_8x4]  = x264_pixel_avg_8x4_sse2;
         pf->hpel_filter = x264_hpel_filter_sse2;
-        if( cpu&X264_CPU_SSE_MISALIGN )
-            pf->hpel_filter = x264_hpel_filter_sse2_misalign;
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2;
         if( !(cpu&X264_CPU_STACK_MOD4) )
             pf->mc_chroma = x264_mc_chroma_sse2;
@@ -716,12 +840,6 @@
                 pf->mc_luma = mc_luma_cache64_sse2;
                 pf->get_ref = get_ref_cache64_sse2;
             }
-            if( cpu&X264_CPU_SSE_MISALIGN )
-            {
-                pf->get_ref = get_ref_sse2_misalign;
-                if( !(cpu&X264_CPU_STACK_MOD4) )
-                    pf->mc_chroma = x264_mc_chroma_sse2_misalign;
-            }
         }
     }
 
@@ -737,6 +855,8 @@
     pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_ssse3;
     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_ssse3;
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_ssse3;
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
 
     if( !(cpu&X264_CPU_SLOW_PSHUFB) )
     {
@@ -813,6 +933,7 @@
         return;
     pf->memzero_aligned = x264_memzero_aligned_avx;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx;
 
     if( cpu&X264_CPU_FMA4 )
         pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc-c.c: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -116,6 +116,23 @@
 void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
                                        uint16_t *dstv, intptr_t i_dstv,
                                        uint16_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
+                                             pixel *dstb, intptr_t i_dstb,
+                                             pixel *dstc, intptr_t i_dstc,
+                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx  ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu,
+                                              uint16_t *dstv, intptr_t i_dstv,
+                                              uint32_t *src,  intptr_t i_src, int w, int h );
 void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
 void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
@@ -144,13 +161,13 @@
 void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
 void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
-void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                       uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                            uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
 
 #define MC_CHROMA(cpu)\
@@ -158,7 +175,6 @@
                            int dx, int dy, int i_width, int i_height );
 MC_CHROMA(mmx2)
 MC_CHROMA(sse2)
-MC_CHROMA(sse2_misalign)
 MC_CHROMA(ssse3)
 MC_CHROMA(ssse3_cache64)
 MC_CHROMA(avx)
@@ -186,7 +202,6 @@
 PIXEL_AVG_WALL(cache64_mmx2)
 PIXEL_AVG_WALL(cache64_sse2)
 PIXEL_AVG_WALL(sse2)
-PIXEL_AVG_WALL(sse2_misalign)
 PIXEL_AVG_WALL(cache64_ssse3)
 PIXEL_AVG_WALL(avx2)
 
@@ -227,7 +242,6 @@
 PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2)
 #endif
 PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2)
-PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign)
 PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2)
 PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2)
@@ -429,7 +443,6 @@
 GET_REF(cache32_mmx2)
 GET_REF(cache64_mmx2)
 #endif
-GET_REF(sse2_misalign)
 GET_REF(cache64_sse2)
 GET_REF(cache64_ssse3)
 GET_REF(cache64_ssse3_atom)
@@ -477,7 +490,6 @@
 HPEL(16, avx, avx, avx, avx)
 HPEL(32, avx2, avx2, avx2, avx2)
 #endif
-HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
 #endif // HIGH_BIT_DEPTH
 
 static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
@@ -521,6 +533,113 @@
 PLANE_INTERLEAVE(avx)
 #endif
 
+#if HAVE_X86_INLINE_ASM
+#define CLIP_ADD(s,x)\
+do\
+{\
+    int temp;\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %2, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %1         \n"\
+        :"+m"(s), "=&r"(temp)\
+        :"m"(x)\
+    );\
+    s = temp;\
+} while(0)
+
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    asm("movd       %0, %%xmm0     \n"\
+        "movd       %1, %%xmm1     \n"\
+        "paddsw %%xmm1, %%xmm0     \n"\
+        "movd   %%xmm0, %0         \n"\
+        :"+m"(M32(s))\
+        :"m"(M32(x))\
+    );\
+} while(0)
+#else
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
+#define CLIP_ADD2(s,x)\
+do\
+{\
+    CLIP_ADD((s)[0], (x)[0]);\
+    CLIP_ADD((s)[1], (x)[1]);\
+} while(0)
+#endif
+
+#define PROPAGATE_LIST(cpu)\
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
+                                                uint16_t *lowres_costs, int16_t *output,\
+                                                int bipred_weight, int mb_y, int len );\
+\
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
+                                              int16_t *propagate_amount, uint16_t *lowres_costs,\
+                                              int bipred_weight, int mb_y, int len, int list )\
+{\
+    int16_t *current = h->scratch_buffer2;\
+\
+    x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
+                                               current, bipred_weight, mb_y, len );\
+\
+    unsigned stride = h->mb.i_mb_stride;\
+    unsigned width = h->mb.i_mb_width;\
+    unsigned height = h->mb.i_mb_height;\
+\
+    for( unsigned i = 0; i < len; current += 32 )\
+    {\
+        int end = X264_MIN( i+8, len );\
+        for( ; i < end; i++, current += 2 )\
+        {\
+            if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
+                continue;\
+\
+            unsigned mbx = current[0];\
+            unsigned mby = current[1];\
+            unsigned idx0 = mbx + mby * stride;\
+            unsigned idx2 = idx0 + stride;\
+\
+            /* Shortcut for the simple/common case of zero MV */\
+            if( !M32( mvs[i] ) )\
+            {\
+                CLIP_ADD( ref_costs[idx0], current[16] );\
+                continue;\
+            }\
+\
+            if( mbx < width-1 && mby < height-1 )\
+            {\
+                CLIP_ADD2( ref_costs+idx0, current+16 );\
+                CLIP_ADD2( ref_costs+idx2, current+32 );\
+            }\
+            else\
+            {\
+                /* Note: this takes advantage of unsigned representation to\
+                 * catch negative mbx/mby. */\
+                if( mby < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx0+0], current[16] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx0+1], current[17] );\
+                }\
+                if( mby+1 < height )\
+                {\
+                    if( mbx < width )\
+                        CLIP_ADD( ref_costs[idx2+0], current[32] );\
+                    if( mbx+1 < width )\
+                        CLIP_ADD( ref_costs[idx2+1], current[33] );\
+                }\
+            }\
+        }\
+    }\
+}
+
+PROPAGATE_LIST(ssse3)
+PROPAGATE_LIST(avx)
+#undef CLIP_ADD
+#undef CLIP_ADD2
+
 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
 {
     if( !(cpu&X264_CPU_MMX) )
@@ -632,6 +751,8 @@
         return;
 
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
 
     if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) )
         pf->integral_init4v = x264_integral_init4v_ssse3;
@@ -644,6 +765,7 @@
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
     pf->plane_copy_interleave        = x264_plane_copy_interleave_avx;
     pf->plane_copy_deinterleave      = x264_plane_copy_deinterleave_avx;
+    pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx;
     pf->store_interleave_chroma      = x264_store_interleave_chroma_avx;
     pf->copy[PIXEL_16x16]            = x264_mc_copy_w16_aligned_avx;
 
@@ -654,7 +776,10 @@
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_xop;
 
     if( cpu&X264_CPU_AVX2 )
+    {
         pf->mc_luma = mc_luma_avx2;
+        pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2;
+    }
 #else // !HIGH_BIT_DEPTH
 
 #if ARCH_X86 // all x86_64 cpus with cacheline split issues use sse2 instead
@@ -679,6 +804,7 @@
     pf->integral_init8v = x264_integral_init8v_sse2;
     pf->hpel_filter = x264_hpel_filter_sse2_amd;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2;
 
     if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
     {
@@ -696,8 +822,6 @@
         pf->avg[PIXEL_8x8]  = x264_pixel_avg_8x8_sse2;
         pf->avg[PIXEL_8x4]  = x264_pixel_avg_8x4_sse2;
         pf->hpel_filter = x264_hpel_filter_sse2;
-        if( cpu&X264_CPU_SSE_MISALIGN )
-            pf->hpel_filter = x264_hpel_filter_sse2_misalign;
         pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2;
         if( !(cpu&X264_CPU_STACK_MOD4) )
             pf->mc_chroma = x264_mc_chroma_sse2;
@@ -716,12 +840,6 @@
                 pf->mc_luma = mc_luma_cache64_sse2;
                 pf->get_ref = get_ref_cache64_sse2;
             }
-            if( cpu&X264_CPU_SSE_MISALIGN )
-            {
-                pf->get_ref = get_ref_sse2_misalign;
-                if( !(cpu&X264_CPU_STACK_MOD4) )
-                    pf->mc_chroma = x264_mc_chroma_sse2_misalign;
-            }
         }
     }
 
@@ -737,6 +855,8 @@
     pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_ssse3;
     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_ssse3;
     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_ssse3;
+    pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
 
     if( !(cpu&X264_CPU_SLOW_PSHUFB) )
     {
@@ -813,6 +933,7 @@
         return;
     pf->memzero_aligned = x264_memzero_aligned_avx;
     pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
+    pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx;
 
     if( cpu&X264_CPU_FMA4 )
         pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mc.h: x86 motion compensation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-32.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* pixel-32.asm: x86_32 pixel metrics
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* pixel.asm: x86 pixel metrics
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -205,7 +205,7 @@
     mov    r4d, %%n
 %endif
     pxor    m0, m0
-.loop
+.loop:
     mova    m1, [r0]
     mova    m2, [r0+offset0_1]
     mova    m3, [r0+offset0_2]
@@ -1265,7 +1265,7 @@
 ; clobber: m3..m7
 ; out: %1 = satd
 %macro SATD_4x4_MMX 3
-    %xdefine %%n n%1
+    %xdefine %%n nn%1
     %assign offset %2*SIZEOF_PIXEL
     LOAD_DIFF m4, m3, none, [r0+     offset], [r2+     offset]
     LOAD_DIFF m5, m3, none, [r0+  r1+offset], [r2+  r3+offset]
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * pixel.h: x86 pixel metrics
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -47,7 +47,6 @@
 
 DECL_X1( sad, mmx2 )
 DECL_X1( sad, sse2 )
-DECL_X4( sad, sse2_misalign )
 DECL_X1( sad, sse3 )
 DECL_X1( sad, sse2_aligned )
 DECL_X1( sad, ssse3 )
@@ -57,6 +56,7 @@
 DECL_X4( sad, sse2 )
 DECL_X4( sad, sse3 )
 DECL_X4( sad, ssse3 )
+DECL_X4( sad, avx )
 DECL_X4( sad, avx2 )
 DECL_X1( ssd, mmx )
 DECL_X1( ssd, mmx2 )
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* predict-a.asm: x86 intra prediction
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Holger Lubitz <holger@lubitz.org>
@@ -31,7 +31,6 @@
 
 SECTION_RODATA 32
 
-pw_0to15:    dw 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 pw_43210123: times 2 dw -3, -2, -1, 0, 1, 2, 3, 4
 pw_m3:       times 16 dw -3
 pw_m7:       times 16 dw -7
@@ -56,6 +55,7 @@
 cextern pw_16
 cextern pw_00ff
 cextern pw_pixel_max
+cextern pw_0to15
 
 %macro STORE8 1
     mova [r0+0*FDEC_STRIDEB], %1
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-c.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict-c.c: intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * predict.h: x86 intra prediction
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* quant-a.asm: x86 quantization and level-run
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * quant.h: x86 quantization and level-run
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/sad-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad-a.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad-a.asm: x86 sad functions
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,7 +32,6 @@
 SECTION_RODATA 32
 
 pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
-deinterleave_sadx4: dd 0,4,2,6
 hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
 
 SECTION .text
@@ -1009,62 +1008,56 @@
 ;=============================================================================
 
 %macro SAD_X3_START_1x16P_SSE2 0
-%if cpuflag(misalign)
-    mova   xmm2, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    psadbw xmm0, xmm2
-    psadbw xmm1, xmm2
-    psadbw xmm2, [r3]
+    mova     m2, [r0]
+%if cpuflag(avx)
+    psadbw   m0, m2, [r1]
+    psadbw   m1, m2, [r2]
+    psadbw   m2, [r3]
 %else
-    mova   xmm3, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    psadbw xmm0, xmm3
-    psadbw xmm1, xmm3
-    psadbw xmm2, xmm3
+    movu     m0, [r1]
+    movu     m1, [r2]
+    movu     m3, [r3]
+    psadbw   m0, m2
+    psadbw   m1, m2
+    psadbw   m2, m3
 %endif
 %endmacro
 
 %macro SAD_X3_1x16P_SSE2 2
-%if cpuflag(misalign)
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm3, [r3+%2]
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm3
+    mova     m3, [r0+%1]
+%if cpuflag(avx)
+    psadbw   m4, m3, [r1+%2]
+    psadbw   m5, m3, [r2+%2]
+    psadbw   m3, [r3+%2]
 %else
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm6, xmm3
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
+    movu     m4, [r1+%2]
+    movu     m5, [r2+%2]
+    movu     m6, [r3+%2]
+    psadbw   m4, m3
+    psadbw   m5, m3
+    psadbw   m3, m6
 %endif
+    paddw    m0, m4
+    paddw    m1, m5
+    paddw    m2, m3
 %endmacro
 
+%if ARCH_X86_64
+    DECLARE_REG_TMP 6
+%else
+    DECLARE_REG_TMP 5
+%endif
+
 %macro SAD_X3_4x16P_SSE2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_1x16P_SSE2
 %else
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0
 %endif
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2
-    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5
+    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1076,156 +1069,117 @@
 %endmacro
 
 %macro SAD_X3_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r4]
-    movhps  xmm1, [r2+r4]
-    movhps  xmm2, [r3+r4]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
+    movq     m3, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movhps   m3, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r4]
+    movhps   m1, [r2+r4]
+    movhps   m2, [r3+r4]
+    psadbw   m0, m3
+    psadbw   m1, m3
+    psadbw   m2, m3
 %endmacro
 
 %macro SAD_X3_2x8P_SSE2 4
-    movq    xmm7, [r0+%1]
-    movq    xmm3, [r1+%2]
-    movq    xmm4, [r2+%2]
-    movq    xmm5, [r3+%2]
-    movhps  xmm7, [r0+%3]
-    movhps  xmm3, [r1+%4]
-    movhps  xmm4, [r2+%4]
-    movhps  xmm5, [r3+%4]
-    psadbw  xmm3, xmm7
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm3
-    paddw   xmm1, xmm4
-    paddw   xmm2, xmm5
+    movq     m6, [r0+%1]
+    movq     m3, [r1+%2]
+    movq     m4, [r2+%2]
+    movq     m5, [r3+%2]
+    movhps   m6, [r0+%3]
+    movhps   m3, [r1+%4]
+    movhps   m4, [r2+%4]
+    movhps   m5, [r3+%4]
+    psadbw   m3, m6
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m0, m3
+    paddw    m1, m4
+    paddw    m2, m5
 %endmacro
 
 %macro SAD_X4_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movq    xmm3, [r4]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r5]
-    movhps  xmm1, [r2+r5]
-    movhps  xmm2, [r3+r5]
-    movhps  xmm3, [r4+r5]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
-    psadbw  xmm3, xmm7
+    movq     m4, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movq     m3, [r4]
+    movhps   m4, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r5]
+    movhps   m1, [r2+r5]
+    movhps   m2, [r3+r5]
+    movhps   m3, [r4+r5]
+    psadbw   m0, m4
+    psadbw   m1, m4
+    psadbw   m2, m4
+    psadbw   m3, m4
 %endmacro
 
 %macro SAD_X4_2x8P_SSE2 4
-    movq    xmm7, [r0+%1]
-    movq    xmm4, [r1+%2]
-    movq    xmm5, [r2+%2]
-%if ARCH_X86_64
-    movq    xmm6, [r3+%2]
-    movq    xmm8, [r4+%2]
-    movhps  xmm7, [r0+%3]
-    movhps  xmm4, [r1+%4]
-    movhps  xmm5, [r2+%4]
-    movhps  xmm6, [r3+%4]
-    movhps  xmm8, [r4+%4]
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    psadbw  xmm6, xmm7
-    psadbw  xmm8, xmm7
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
-    paddw   xmm2, xmm6
-    paddw   xmm3, xmm8
-%else
-    movhps  xmm7, [r0+%3]
-    movhps  xmm4, [r1+%4]
-    movhps  xmm5, [r2+%4]
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
-    movq    xmm6, [r3+%2]
-    movq    xmm4, [r4+%2]
-    movhps  xmm6, [r3+%4]
-    movhps  xmm4, [r4+%4]
-    psadbw  xmm6, xmm7
-    psadbw  xmm4, xmm7
-    paddw   xmm2, xmm6
-    paddw   xmm3, xmm4
-%endif
+    movq     m6, [r0+%1]
+    movq     m4, [r1+%2]
+    movq     m5, [r2+%2]
+    movhps   m6, [r0+%3]
+    movhps   m4, [r1+%4]
+    movhps   m5, [r2+%4]
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m0, m4
+    paddw    m1, m5
+    movq     m4, [r3+%2]
+    movq     m5, [r4+%2]
+    movhps   m4, [r3+%4]
+    movhps   m5, [r4+%4]
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m2, m4
+    paddw    m3, m5
 %endmacro
 
 %macro SAD_X4_START_1x16P_SSE2 0
-%if cpuflag(misalign)
-    mova   xmm3, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    psadbw xmm0, xmm3
-    psadbw xmm1, xmm3
-    psadbw xmm2, xmm3
-    psadbw xmm3, [r4]
+    mova     m3, [r0]
+%if cpuflag(avx)
+    psadbw   m0, m3, [r1]
+    psadbw   m1, m3, [r2]
+    psadbw   m2, m3, [r3]
+    psadbw   m3, [r4]
 %else
-    mova   xmm7, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    movu   xmm3, [r4]
-    psadbw xmm0, xmm7
-    psadbw xmm1, xmm7
-    psadbw xmm2, xmm7
-    psadbw xmm3, xmm7
+    movu     m0, [r1]
+    movu     m1, [r2]
+    movu     m2, [r3]
+    movu     m4, [r4]
+    psadbw   m0, m3
+    psadbw   m1, m3
+    psadbw   m2, m3
+    psadbw   m3, m4
 %endif
 %endmacro
 
 %macro SAD_X4_1x16P_SSE2 2
-%if cpuflag(misalign)
-    mova   xmm7, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-    psadbw xmm4, xmm7
-    psadbw xmm5, xmm7
-    psadbw xmm6, xmm7
-    psadbw xmm7, [r4+%2]
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
-    paddw  xmm3, xmm7
+    mova     m6, [r0+%1]
+%if cpuflag(avx)
+    psadbw   m4, m6, [r1+%2]
+    psadbw   m5, m6, [r2+%2]
 %else
-    mova   xmm7, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-%if ARCH_X86_64
-    movu   xmm8, [r4+%2]
-    psadbw xmm4, xmm7
-    psadbw xmm5, xmm7
-    psadbw xmm6, xmm7
-    psadbw xmm8, xmm7
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
-    paddw  xmm3, xmm8
-%else
-    psadbw xmm4, xmm7
-    psadbw xmm5, xmm7
-    paddw  xmm0, xmm4
-    psadbw xmm6, xmm7
-    movu   xmm4, [r4+%2]
-    paddw  xmm1, xmm5
-    psadbw xmm4, xmm7
-    paddw  xmm2, xmm6
-    paddw  xmm3, xmm4
+    movu     m4, [r1+%2]
+    movu     m5, [r2+%2]
+    psadbw   m4, m6
+    psadbw   m5, m6
 %endif
+    paddw    m0, m4
+    paddw    m1, m5
+%if cpuflag(avx)
+    psadbw   m4, m6, [r3+%2]
+    psadbw   m5, m6, [r4+%2]
+%else
+    movu     m4, [r3+%2]
+    movu     m5, [r4+%2]
+    psadbw   m4, m6
+    psadbw   m5, m6
 %endif
+    paddw    m2, m4
+    paddw    m3, m5
 %endmacro
 
 %macro SAD_X4_4x16P_SSE2 2
@@ -1251,15 +1205,12 @@
 
 %macro SAD_X3_4x8P_SSE2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_2x8P_SSE2
 %else
     SAD_X3_2x8P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0, FENC_STRIDE*(1+(%1&1)*4), r4*1
 %endif
-    SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), r5
+    SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1290,78 +1241,86 @@
 %endmacro
 
 %macro SAD_X3_END_SSE2 0
-    movhlps xmm4, xmm0
-    movhlps xmm5, xmm1
-    movhlps xmm6, xmm2
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
-    paddw   xmm2, xmm6
-%if UNIX64
-    movd [r6+0], xmm0
-    movd [r6+4], xmm1
-    movd [r6+8], xmm2
+    movifnidn r5, r5mp
+%if cpuflag(ssse3)
+    packssdw m0, m1
+    packssdw m2, m2
+    phaddd   m0, m2
+    mova   [r5], m0
 %else
-    mov      r0, r5mp
-    movd [r0+0], xmm0
-    movd [r0+4], xmm1
-    movd [r0+8], xmm2
+    movhlps  m3, m0
+    movhlps  m4, m1
+    movhlps  m5, m2
+    paddw    m0, m3
+    paddw    m1, m4
+    paddw    m2, m5
+    movd [r5+0], m0
+    movd [r5+4], m1
+    movd [r5+8], m2
 %endif
     RET
 %endmacro
 
 %macro SAD_X4_END_SSE2 0
-    mov       r0, r6mp
-    psllq   xmm1, 32
-    psllq   xmm3, 32
-    paddw   xmm0, xmm1
-    paddw   xmm2, xmm3
-    movhlps xmm1, xmm0
-    movhlps xmm3, xmm2
-    paddw   xmm0, xmm1
-    paddw   xmm2, xmm3
-    movq  [r0+0], xmm0
-    movq  [r0+8], xmm2
+    mov      r0, r6mp
+%if cpuflag(ssse3)
+    packssdw m0, m1
+    packssdw m2, m3
+    phaddd   m0, m2
+    mova   [r0], m0
+%else
+    psllq    m1, 32
+    psllq    m3, 32
+    paddw    m0, m1
+    paddw    m2, m3
+    movhlps  m1, m0
+    movhlps  m3, m2
+    paddw    m0, m1
+    paddw    m2, m3
+    movq [r0+0], m0
+    movq [r0+8], m2
+%endif
     RET
 %endmacro
 
 %macro SAD_X4_START_2x8P_SSSE3 0
-    movddup xmm4, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r3]
-    movhps  xmm0, [r2]
-    movhps  xmm1, [r4]
-    movddup xmm5, [r0+FENC_STRIDE]
-    movq    xmm2, [r1+r5]
-    movq    xmm3, [r3+r5]
-    movhps  xmm2, [r2+r5]
-    movhps  xmm3, [r4+r5]
-    psadbw  xmm0, xmm4
-    psadbw  xmm1, xmm4
-    psadbw  xmm2, xmm5
-    psadbw  xmm3, xmm5
-    paddw   xmm0, xmm2
-    paddw   xmm1, xmm3
+    movddup  m4, [r0]
+    movq     m0, [r1]
+    movq     m1, [r3]
+    movhps   m0, [r2]
+    movhps   m1, [r4]
+    movddup  m5, [r0+FENC_STRIDE]
+    movq     m2, [r1+r5]
+    movq     m3, [r3+r5]
+    movhps   m2, [r2+r5]
+    movhps   m3, [r4+r5]
+    psadbw   m0, m4
+    psadbw   m1, m4
+    psadbw   m2, m5
+    psadbw   m3, m5
+    paddw    m0, m2
+    paddw    m1, m3
 %endmacro
 
 %macro SAD_X4_2x8P_SSSE3 4
-    movddup xmm6, [r0+%1]
-    movq    xmm2, [r1+%2]
-    movq    xmm3, [r3+%2]
-    movhps  xmm2, [r2+%2]
-    movhps  xmm3, [r4+%2]
-    movddup xmm7, [r0+%3]
-    movq    xmm4, [r1+%4]
-    movq    xmm5, [r3+%4]
-    movhps  xmm4, [r2+%4]
-    movhps  xmm5, [r4+%4]
-    psadbw  xmm2, xmm6
-    psadbw  xmm3, xmm6
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm2
-    paddw   xmm1, xmm3
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
+    movddup  m6, [r0+%1]
+    movq     m2, [r1+%2]
+    movq     m3, [r3+%2]
+    movhps   m2, [r2+%2]
+    movhps   m3, [r4+%2]
+    movddup  m7, [r0+%3]
+    movq     m4, [r1+%4]
+    movq     m5, [r3+%4]
+    movhps   m4, [r2+%4]
+    movhps   m5, [r4+%4]
+    psadbw   m2, m6
+    psadbw   m3, m6
+    psadbw   m4, m7
+    psadbw   m5, m7
+    paddw    m0, m2
+    paddw    m1, m3
+    paddw    m0, m4
+    paddw    m1, m5
 %endmacro
 
 %macro SAD_X4_4x8P_SSSE3 2
@@ -1384,9 +1343,9 @@
 %endmacro
 
 %macro SAD_X4_END_SSSE3 0
-    mov       r0, r6mp
-    packssdw xmm0, xmm1
-    movdqa  [r0], xmm0
+    mov      r0, r6mp
+    packssdw m0, m1
+    mova   [r0], m0
     RET
 %endmacro
 
@@ -1421,15 +1380,12 @@
 
 %macro SAD_X3_4x16P_AVX2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_2x16P_AVX2
 %else
     SAD_X3_2x16P_AVX2 FENC_STRIDE*(0+(%1&1)*4), r4*0, r4*1
 %endif
-    SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, r5
+    SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1444,12 +1400,12 @@
     vbroadcasti128 m4, [r0]
     vbroadcasti128 m5, [r0+FENC_STRIDE]
     movu   xm0, [r1]
-    movu   xm1, [r3]
+    movu   xm1, [r2]
     movu   xm2, [r1+r5]
-    movu   xm3, [r3+r5]
-    vinserti128 m0, m0, [r2], 1
+    movu   xm3, [r2+r5]
+    vinserti128 m0, m0, [r3], 1
     vinserti128 m1, m1, [r4], 1
-    vinserti128 m2, m2, [r2+r5], 1
+    vinserti128 m2, m2, [r3+r5], 1
     vinserti128 m3, m3, [r4+r5], 1
     psadbw  m0, m4
     psadbw  m1, m4
@@ -1463,12 +1419,12 @@
     vbroadcasti128 m6, [r0+%1]
     vbroadcasti128 m7, [r0+%3]
     movu   xm2, [r1+%2]
-    movu   xm3, [r3+%2]
+    movu   xm3, [r2+%2]
     movu   xm4, [r1+%4]
-    movu   xm5, [r3+%4]
-    vinserti128 m2, m2, [r2+%2], 1
+    movu   xm5, [r2+%4]
+    vinserti128 m2, m2, [r3+%2], 1
     vinserti128 m3, m3, [r4+%2], 1
-    vinserti128 m4, m4, [r2+%4], 1
+    vinserti128 m4, m4, [r3+%4], 1
     vinserti128 m5, m5, [r4+%4], 1
     psadbw  m2, m6
     psadbw  m3, m6
@@ -1500,41 +1456,22 @@
 %endmacro
 
 %macro SAD_X3_END_AVX2 0
-    vextracti128 xm4, m0, 1
-    vextracti128 xm5, m1, 1
-    vextracti128 xm6, m2, 1
-    paddw   xm0, xm4
-    paddw   xm1, xm5
-    paddw   xm2, xm6
-    movhlps xm4, xm0
-    movhlps xm5, xm1
-    movhlps xm6, xm2
-    paddw   xm0, xm4
-    paddw   xm1, xm5
-    paddw   xm2, xm6
-%if UNIX64
-    movd [r6+0], xm0
-    movd [r6+4], xm1
-    movd [r6+8], xm2
-%else
-    mov      r0, r5mp
-    movd [r0+0], xm0
-    movd [r0+4], xm1
-    movd [r0+8], xm2
-%endif
+    movifnidn r5, r5mp
+    packssdw  m0, m1        ; 0 0 1 1 0 0 1 1
+    packssdw  m2, m2        ; 2 2 _ _ 2 2 _ _
+    phaddd    m0, m2        ; 0 1 2 _ 0 1 2 _
+    vextracti128 xm1, m0, 1
+    paddd    xm0, xm1       ; 0 1 2 _
+    mova    [r5], xm0
     RET
 %endmacro
 
 %macro SAD_X4_END_AVX2 0
-    mov      r0, r6mp
-    punpckhqdq m2, m0, m0
-    punpckhqdq m3, m1, m1
-    paddw    m0, m2
-    paddw    m1, m3
-    packssdw m0, m1
-    mova    xm2, [deinterleave_sadx4]
-    vpermd   m0, m2, m0
-    mova   [r0], xm0
+    mov       r0, r6mp
+    packssdw  m0, m1        ; 0 0 1 1 2 2 3 3
+    vextracti128 xm1, m0, 1
+    phaddd   xm0, xm1       ; 0 1 2 3
+    mova    [r0], xm0
     RET
 %endmacro
 
@@ -1542,8 +1479,8 @@
 ; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
 ;                          uint8_t *pix2, intptr_t i_stride, int scores[3] )
 ;-----------------------------------------------------------------------------
-%macro SAD_X_SSE2 3
-cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,9
+%macro SAD_X_SSE2 4
+cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
 %assign x 0
 %rep %3/4
     SAD_X%1_4x%2P_SSE2 x, %3/4
@@ -1553,28 +1490,22 @@
 %endmacro
 
 INIT_XMM sse2
-SAD_X_SSE2 3, 16, 16
-SAD_X_SSE2 3, 16,  8
-SAD_X_SSE2 3,  8, 16
-SAD_X_SSE2 3,  8,  8
-SAD_X_SSE2 3,  8,  4
-SAD_X_SSE2 4, 16, 16
-SAD_X_SSE2 4, 16,  8
-SAD_X_SSE2 4,  8, 16
-SAD_X_SSE2 4,  8,  8
-SAD_X_SSE2 4,  8,  4
-
-INIT_XMM sse2, misalign
-SAD_X_SSE2 3, 16, 16
-SAD_X_SSE2 3, 16,  8
-SAD_X_SSE2 4, 16, 16
-SAD_X_SSE2 4, 16,  8
+SAD_X_SSE2 3, 16, 16, 7
+SAD_X_SSE2 3, 16,  8, 7
+SAD_X_SSE2 3,  8, 16, 7
+SAD_X_SSE2 3,  8,  8, 7
+SAD_X_SSE2 3,  8,  4, 7
+SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16,  8, 7
+SAD_X_SSE2 4,  8, 16, 7
+SAD_X_SSE2 4,  8,  8, 7
+SAD_X_SSE2 4,  8,  4, 7
 
 INIT_XMM sse3
-SAD_X_SSE2 3, 16, 16
-SAD_X_SSE2 3, 16,  8
-SAD_X_SSE2 4, 16, 16
-SAD_X_SSE2 4, 16,  8
+SAD_X_SSE2 3, 16, 16, 7
+SAD_X_SSE2 3, 16,  8, 7
+SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16,  8, 7
 
 %macro SAD_X_SSSE3 3
 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8
@@ -1587,9 +1518,19 @@
 %endmacro
 
 INIT_XMM ssse3
-SAD_X_SSSE3 4, 8, 16
-SAD_X_SSSE3 4, 8,  8
-SAD_X_SSSE3 4, 8,  4
+SAD_X_SSE2  3, 16, 16, 7
+SAD_X_SSE2  3, 16,  8, 7
+SAD_X_SSE2  4, 16, 16, 7
+SAD_X_SSE2  4, 16,  8, 7
+SAD_X_SSSE3 4,  8, 16
+SAD_X_SSSE3 4,  8,  8
+SAD_X_SSSE3 4,  8,  4
+
+INIT_XMM avx
+SAD_X_SSE2 3, 16, 16, 6
+SAD_X_SSE2 3, 16,  8, 6
+SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16,  8, 7
 
 %macro SAD_X_AVX2 4
 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad-a.asm: x86 sad functions
 ;*****************************************************************************
-;* Copyright (C) 2003-2013 x264 project
+;* Copyright (C) 2003-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Jason Garrett-Glaser <darkshikari@gmail.com>
@@ -32,7 +32,6 @@
 SECTION_RODATA 32
 
 pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
-deinterleave_sadx4: dd 0,4,2,6
 hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
 
 SECTION .text
@@ -1009,62 +1008,56 @@
 ;=============================================================================
 
 %macro SAD_X3_START_1x16P_SSE2 0
-%if cpuflag(misalign)
-    mova   xmm2, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    psadbw xmm0, xmm2
-    psadbw xmm1, xmm2
-    psadbw xmm2, [r3]
+    mova     m2, [r0]
+%if cpuflag(avx)
+    psadbw   m0, m2, [r1]
+    psadbw   m1, m2, [r2]
+    psadbw   m2, [r3]
 %else
-    mova   xmm3, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    psadbw xmm0, xmm3
-    psadbw xmm1, xmm3
-    psadbw xmm2, xmm3
+    movu     m0, [r1]
+    movu     m1, [r2]
+    movu     m3, [r3]
+    psadbw   m0, m2
+    psadbw   m1, m2
+    psadbw   m2, m3
 %endif
 %endmacro
 
 %macro SAD_X3_1x16P_SSE2 2
-%if cpuflag(misalign)
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm3, [r3+%2]
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm3
+    mova     m3, [r0+%1]
+%if cpuflag(avx)
+    psadbw   m4, m3, [r1+%2]
+    psadbw   m5, m3, [r2+%2]
+    psadbw   m3, [r3+%2]
 %else
-    mova   xmm3, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-    psadbw xmm4, xmm3
-    psadbw xmm5, xmm3
-    psadbw xmm6, xmm3
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
+    movu     m4, [r1+%2]
+    movu     m5, [r2+%2]
+    movu     m6, [r3+%2]
+    psadbw   m4, m3
+    psadbw   m5, m3
+    psadbw   m3, m6
 %endif
+    paddw    m0, m4
+    paddw    m1, m5
+    paddw    m2, m3
 %endmacro
 
+%if ARCH_X86_64
+    DECLARE_REG_TMP 6
+%else
+    DECLARE_REG_TMP 5
+%endif
+
 %macro SAD_X3_4x16P_SSE2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_1x16P_SSE2
 %else
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0
 %endif
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1
     SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2
-    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5
+    SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1076,156 +1069,117 @@
 %endmacro
 
 %macro SAD_X3_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r4]
-    movhps  xmm1, [r2+r4]
-    movhps  xmm2, [r3+r4]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
+    movq     m3, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movhps   m3, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r4]
+    movhps   m1, [r2+r4]
+    movhps   m2, [r3+r4]
+    psadbw   m0, m3
+    psadbw   m1, m3
+    psadbw   m2, m3
 %endmacro
 
 %macro SAD_X3_2x8P_SSE2 4
-    movq    xmm7, [r0+%1]
-    movq    xmm3, [r1+%2]
-    movq    xmm4, [r2+%2]
-    movq    xmm5, [r3+%2]
-    movhps  xmm7, [r0+%3]
-    movhps  xmm3, [r1+%4]
-    movhps  xmm4, [r2+%4]
-    movhps  xmm5, [r3+%4]
-    psadbw  xmm3, xmm7
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm3
-    paddw   xmm1, xmm4
-    paddw   xmm2, xmm5
+    movq     m6, [r0+%1]
+    movq     m3, [r1+%2]
+    movq     m4, [r2+%2]
+    movq     m5, [r3+%2]
+    movhps   m6, [r0+%3]
+    movhps   m3, [r1+%4]
+    movhps   m4, [r2+%4]
+    movhps   m5, [r3+%4]
+    psadbw   m3, m6
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m0, m3
+    paddw    m1, m4
+    paddw    m2, m5
 %endmacro
 
 %macro SAD_X4_START_2x8P_SSE2 0
-    movq    xmm7, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r2]
-    movq    xmm2, [r3]
-    movq    xmm3, [r4]
-    movhps  xmm7, [r0+FENC_STRIDE]
-    movhps  xmm0, [r1+r5]
-    movhps  xmm1, [r2+r5]
-    movhps  xmm2, [r3+r5]
-    movhps  xmm3, [r4+r5]
-    psadbw  xmm0, xmm7
-    psadbw  xmm1, xmm7
-    psadbw  xmm2, xmm7
-    psadbw  xmm3, xmm7
+    movq     m4, [r0]
+    movq     m0, [r1]
+    movq     m1, [r2]
+    movq     m2, [r3]
+    movq     m3, [r4]
+    movhps   m4, [r0+FENC_STRIDE]
+    movhps   m0, [r1+r5]
+    movhps   m1, [r2+r5]
+    movhps   m2, [r3+r5]
+    movhps   m3, [r4+r5]
+    psadbw   m0, m4
+    psadbw   m1, m4
+    psadbw   m2, m4
+    psadbw   m3, m4
 %endmacro
 
 %macro SAD_X4_2x8P_SSE2 4
-    movq    xmm7, [r0+%1]
-    movq    xmm4, [r1+%2]
-    movq    xmm5, [r2+%2]
-%if ARCH_X86_64
-    movq    xmm6, [r3+%2]
-    movq    xmm8, [r4+%2]
-    movhps  xmm7, [r0+%3]
-    movhps  xmm4, [r1+%4]
-    movhps  xmm5, [r2+%4]
-    movhps  xmm6, [r3+%4]
-    movhps  xmm8, [r4+%4]
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    psadbw  xmm6, xmm7
-    psadbw  xmm8, xmm7
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
-    paddw   xmm2, xmm6
-    paddw   xmm3, xmm8
-%else
-    movhps  xmm7, [r0+%3]
-    movhps  xmm4, [r1+%4]
-    movhps  xmm5, [r2+%4]
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
-    movq    xmm6, [r3+%2]
-    movq    xmm4, [r4+%2]
-    movhps  xmm6, [r3+%4]
-    movhps  xmm4, [r4+%4]
-    psadbw  xmm6, xmm7
-    psadbw  xmm4, xmm7
-    paddw   xmm2, xmm6
-    paddw   xmm3, xmm4
-%endif
+    movq     m6, [r0+%1]
+    movq     m4, [r1+%2]
+    movq     m5, [r2+%2]
+    movhps   m6, [r0+%3]
+    movhps   m4, [r1+%4]
+    movhps   m5, [r2+%4]
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m0, m4
+    paddw    m1, m5
+    movq     m4, [r3+%2]
+    movq     m5, [r4+%2]
+    movhps   m4, [r3+%4]
+    movhps   m5, [r4+%4]
+    psadbw   m4, m6
+    psadbw   m5, m6
+    paddw    m2, m4
+    paddw    m3, m5
 %endmacro
 
 %macro SAD_X4_START_1x16P_SSE2 0
-%if cpuflag(misalign)
-    mova   xmm3, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    psadbw xmm0, xmm3
-    psadbw xmm1, xmm3
-    psadbw xmm2, xmm3
-    psadbw xmm3, [r4]
+    mova     m3, [r0]
+%if cpuflag(avx)
+    psadbw   m0, m3, [r1]
+    psadbw   m1, m3, [r2]
+    psadbw   m2, m3, [r3]
+    psadbw   m3, [r4]
 %else
-    mova   xmm7, [r0]
-    movu   xmm0, [r1]
-    movu   xmm1, [r2]
-    movu   xmm2, [r3]
-    movu   xmm3, [r4]
-    psadbw xmm0, xmm7
-    psadbw xmm1, xmm7
-    psadbw xmm2, xmm7
-    psadbw xmm3, xmm7
+    movu     m0, [r1]
+    movu     m1, [r2]
+    movu     m2, [r3]
+    movu     m4, [r4]
+    psadbw   m0, m3
+    psadbw   m1, m3
+    psadbw   m2, m3
+    psadbw   m3, m4
 %endif
 %endmacro
 
 %macro SAD_X4_1x16P_SSE2 2
-%if cpuflag(misalign)
-    mova   xmm7, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-    psadbw xmm4, xmm7
-    psadbw xmm5, xmm7
-    psadbw xmm6, xmm7
-    psadbw xmm7, [r4+%2]
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
-    paddw  xmm3, xmm7
+    mova     m6, [r0+%1]
+%if cpuflag(avx)
+    psadbw   m4, m6, [r1+%2]
+    psadbw   m5, m6, [r2+%2]
 %else
-    mova   xmm7, [r0+%1]
-    movu   xmm4, [r1+%2]
-    movu   xmm5, [r2+%2]
-    movu   xmm6, [r3+%2]
-%if ARCH_X86_64
-    movu   xmm8, [r4+%2]
-    psadbw xmm4, xmm7
-    psadbw xmm5, xmm7
-    psadbw xmm6, xmm7
-    psadbw xmm8, xmm7
-    paddw  xmm0, xmm4
-    paddw  xmm1, xmm5
-    paddw  xmm2, xmm6
-    paddw  xmm3, xmm8
-%else
-    psadbw xmm4, xmm7
-    psadbw xmm5, xmm7
-    paddw  xmm0, xmm4
-    psadbw xmm6, xmm7
-    movu   xmm4, [r4+%2]
-    paddw  xmm1, xmm5
-    psadbw xmm4, xmm7
-    paddw  xmm2, xmm6
-    paddw  xmm3, xmm4
+    movu     m4, [r1+%2]
+    movu     m5, [r2+%2]
+    psadbw   m4, m6
+    psadbw   m5, m6
 %endif
+    paddw    m0, m4
+    paddw    m1, m5
+%if cpuflag(avx)
+    psadbw   m4, m6, [r3+%2]
+    psadbw   m5, m6, [r4+%2]
+%else
+    movu     m4, [r3+%2]
+    movu     m5, [r4+%2]
+    psadbw   m4, m6
+    psadbw   m5, m6
 %endif
+    paddw    m2, m4
+    paddw    m3, m5
 %endmacro
 
 %macro SAD_X4_4x16P_SSE2 2
@@ -1251,15 +1205,12 @@
 
 %macro SAD_X3_4x8P_SSE2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_2x8P_SSE2
 %else
     SAD_X3_2x8P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0, FENC_STRIDE*(1+(%1&1)*4), r4*1
 %endif
-    SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), r5
+    SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1290,78 +1241,86 @@
 %endmacro
 
 %macro SAD_X3_END_SSE2 0
-    movhlps xmm4, xmm0
-    movhlps xmm5, xmm1
-    movhlps xmm6, xmm2
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
-    paddw   xmm2, xmm6
-%if UNIX64
-    movd [r6+0], xmm0
-    movd [r6+4], xmm1
-    movd [r6+8], xmm2
+    movifnidn r5, r5mp
+%if cpuflag(ssse3)
+    packssdw m0, m1
+    packssdw m2, m2
+    phaddd   m0, m2
+    mova   [r5], m0
 %else
-    mov      r0, r5mp
-    movd [r0+0], xmm0
-    movd [r0+4], xmm1
-    movd [r0+8], xmm2
+    movhlps  m3, m0
+    movhlps  m4, m1
+    movhlps  m5, m2
+    paddw    m0, m3
+    paddw    m1, m4
+    paddw    m2, m5
+    movd [r5+0], m0
+    movd [r5+4], m1
+    movd [r5+8], m2
 %endif
     RET
 %endmacro
 
 %macro SAD_X4_END_SSE2 0
-    mov       r0, r6mp
-    psllq   xmm1, 32
-    psllq   xmm3, 32
-    paddw   xmm0, xmm1
-    paddw   xmm2, xmm3
-    movhlps xmm1, xmm0
-    movhlps xmm3, xmm2
-    paddw   xmm0, xmm1
-    paddw   xmm2, xmm3
-    movq  [r0+0], xmm0
-    movq  [r0+8], xmm2
+    mov      r0, r6mp
+%if cpuflag(ssse3)
+    packssdw m0, m1
+    packssdw m2, m3
+    phaddd   m0, m2
+    mova   [r0], m0
+%else
+    psllq    m1, 32
+    psllq    m3, 32
+    paddw    m0, m1
+    paddw    m2, m3
+    movhlps  m1, m0
+    movhlps  m3, m2
+    paddw    m0, m1
+    paddw    m2, m3
+    movq [r0+0], m0
+    movq [r0+8], m2
+%endif
     RET
 %endmacro
 
 %macro SAD_X4_START_2x8P_SSSE3 0
-    movddup xmm4, [r0]
-    movq    xmm0, [r1]
-    movq    xmm1, [r3]
-    movhps  xmm0, [r2]
-    movhps  xmm1, [r4]
-    movddup xmm5, [r0+FENC_STRIDE]
-    movq    xmm2, [r1+r5]
-    movq    xmm3, [r3+r5]
-    movhps  xmm2, [r2+r5]
-    movhps  xmm3, [r4+r5]
-    psadbw  xmm0, xmm4
-    psadbw  xmm1, xmm4
-    psadbw  xmm2, xmm5
-    psadbw  xmm3, xmm5
-    paddw   xmm0, xmm2
-    paddw   xmm1, xmm3
+    movddup  m4, [r0]
+    movq     m0, [r1]
+    movq     m1, [r3]
+    movhps   m0, [r2]
+    movhps   m1, [r4]
+    movddup  m5, [r0+FENC_STRIDE]
+    movq     m2, [r1+r5]
+    movq     m3, [r3+r5]
+    movhps   m2, [r2+r5]
+    movhps   m3, [r4+r5]
+    psadbw   m0, m4
+    psadbw   m1, m4
+    psadbw   m2, m5
+    psadbw   m3, m5
+    paddw    m0, m2
+    paddw    m1, m3
 %endmacro
 
 %macro SAD_X4_2x8P_SSSE3 4
-    movddup xmm6, [r0+%1]
-    movq    xmm2, [r1+%2]
-    movq    xmm3, [r3+%2]
-    movhps  xmm2, [r2+%2]
-    movhps  xmm3, [r4+%2]
-    movddup xmm7, [r0+%3]
-    movq    xmm4, [r1+%4]
-    movq    xmm5, [r3+%4]
-    movhps  xmm4, [r2+%4]
-    movhps  xmm5, [r4+%4]
-    psadbw  xmm2, xmm6
-    psadbw  xmm3, xmm6
-    psadbw  xmm4, xmm7
-    psadbw  xmm5, xmm7
-    paddw   xmm0, xmm2
-    paddw   xmm1, xmm3
-    paddw   xmm0, xmm4
-    paddw   xmm1, xmm5
+    movddup  m6, [r0+%1]
+    movq     m2, [r1+%2]
+    movq     m3, [r3+%2]
+    movhps   m2, [r2+%2]
+    movhps   m3, [r4+%2]
+    movddup  m7, [r0+%3]
+    movq     m4, [r1+%4]
+    movq     m5, [r3+%4]
+    movhps   m4, [r2+%4]
+    movhps   m5, [r4+%4]
+    psadbw   m2, m6
+    psadbw   m3, m6
+    psadbw   m4, m7
+    psadbw   m5, m7
+    paddw    m0, m2
+    paddw    m1, m3
+    paddw    m0, m4
+    paddw    m1, m5
 %endmacro
 
 %macro SAD_X4_4x8P_SSSE3 2
@@ -1384,9 +1343,9 @@
 %endmacro
 
 %macro SAD_X4_END_SSSE3 0
-    mov       r0, r6mp
-    packssdw xmm0, xmm1
-    movdqa  [r0], xmm0
+    mov      r0, r6mp
+    packssdw m0, m1
+    mova   [r0], m0
     RET
 %endmacro
 
@@ -1421,15 +1380,12 @@
 
 %macro SAD_X3_4x16P_AVX2 2
 %if %1==0
-%if UNIX64
-    mov  r6, r5
-%endif
-    lea  r5, [r4*3]
+    lea  t0, [r4*3]
     SAD_X3_START_2x16P_AVX2
 %else
     SAD_X3_2x16P_AVX2 FENC_STRIDE*(0+(%1&1)*4), r4*0, r4*1
 %endif
-    SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, r5
+    SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, t0
 %if %1 != %2-1
 %if (%1&1) != 0
     add  r0, 8*FENC_STRIDE
@@ -1444,12 +1400,12 @@
     vbroadcasti128 m4, [r0]
     vbroadcasti128 m5, [r0+FENC_STRIDE]
     movu   xm0, [r1]
-    movu   xm1, [r3]
+    movu   xm1, [r2]
     movu   xm2, [r1+r5]
-    movu   xm3, [r3+r5]
-    vinserti128 m0, m0, [r2], 1
+    movu   xm3, [r2+r5]
+    vinserti128 m0, m0, [r3], 1
     vinserti128 m1, m1, [r4], 1
-    vinserti128 m2, m2, [r2+r5], 1
+    vinserti128 m2, m2, [r3+r5], 1
     vinserti128 m3, m3, [r4+r5], 1
     psadbw  m0, m4
     psadbw  m1, m4
@@ -1463,12 +1419,12 @@
     vbroadcasti128 m6, [r0+%1]
     vbroadcasti128 m7, [r0+%3]
     movu   xm2, [r1+%2]
-    movu   xm3, [r3+%2]
+    movu   xm3, [r2+%2]
     movu   xm4, [r1+%4]
-    movu   xm5, [r3+%4]
-    vinserti128 m2, m2, [r2+%2], 1
+    movu   xm5, [r2+%4]
+    vinserti128 m2, m2, [r3+%2], 1
     vinserti128 m3, m3, [r4+%2], 1
-    vinserti128 m4, m4, [r2+%4], 1
+    vinserti128 m4, m4, [r3+%4], 1
     vinserti128 m5, m5, [r4+%4], 1
     psadbw  m2, m6
     psadbw  m3, m6
@@ -1500,41 +1456,22 @@
 %endmacro
 
 %macro SAD_X3_END_AVX2 0
-    vextracti128 xm4, m0, 1
-    vextracti128 xm5, m1, 1
-    vextracti128 xm6, m2, 1
-    paddw   xm0, xm4
-    paddw   xm1, xm5
-    paddw   xm2, xm6
-    movhlps xm4, xm0
-    movhlps xm5, xm1
-    movhlps xm6, xm2
-    paddw   xm0, xm4
-    paddw   xm1, xm5
-    paddw   xm2, xm6
-%if UNIX64
-    movd [r6+0], xm0
-    movd [r6+4], xm1
-    movd [r6+8], xm2
-%else
-    mov      r0, r5mp
-    movd [r0+0], xm0
-    movd [r0+4], xm1
-    movd [r0+8], xm2
-%endif
+    movifnidn r5, r5mp
+    packssdw  m0, m1        ; 0 0 1 1 0 0 1 1
+    packssdw  m2, m2        ; 2 2 _ _ 2 2 _ _
+    phaddd    m0, m2        ; 0 1 2 _ 0 1 2 _
+    vextracti128 xm1, m0, 1
+    paddd    xm0, xm1       ; 0 1 2 _
+    mova    [r5], xm0
     RET
 %endmacro
 
 %macro SAD_X4_END_AVX2 0
-    mov      r0, r6mp
-    punpckhqdq m2, m0, m0
-    punpckhqdq m3, m1, m1
-    paddw    m0, m2
-    paddw    m1, m3
-    packssdw m0, m1
-    mova    xm2, [deinterleave_sadx4]
-    vpermd   m0, m2, m0
-    mova   [r0], xm0
+    mov       r0, r6mp
+    packssdw  m0, m1        ; 0 0 1 1 2 2 3 3
+    vextracti128 xm1, m0, 1
+    phaddd   xm0, xm1       ; 0 1 2 3
+    mova    [r0], xm0
     RET
 %endmacro
 
@@ -1542,8 +1479,8 @@
 ; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
 ;                          uint8_t *pix2, intptr_t i_stride, int scores[3] )
 ;-----------------------------------------------------------------------------
-%macro SAD_X_SSE2 3
-cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,9
+%macro SAD_X_SSE2 4
+cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
 %assign x 0
 %rep %3/4
     SAD_X%1_4x%2P_SSE2 x, %3/4
@@ -1553,28 +1490,22 @@
 %endmacro
 
 INIT_XMM sse2
-SAD_X_SSE2 3, 16, 16
-SAD_X_SSE2 3, 16,  8
-SAD_X_SSE2 3,  8, 16
-SAD_X_SSE2 3,  8,  8
-SAD_X_SSE2 3,  8,  4
-SAD_X_SSE2 4, 16, 16
-SAD_X_SSE2 4, 16,  8
-SAD_X_SSE2 4,  8, 16
-SAD_X_SSE2 4,  8,  8
-SAD_X_SSE2 4,  8,  4
-
-INIT_XMM sse2, misalign
-SAD_X_SSE2 3, 16, 16
-SAD_X_SSE2 3, 16,  8
-SAD_X_SSE2 4, 16, 16
-SAD_X_SSE2 4, 16,  8
+SAD_X_SSE2 3, 16, 16, 7
+SAD_X_SSE2 3, 16,  8, 7
+SAD_X_SSE2 3,  8, 16, 7
+SAD_X_SSE2 3,  8,  8, 7
+SAD_X_SSE2 3,  8,  4, 7
+SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16,  8, 7
+SAD_X_SSE2 4,  8, 16, 7
+SAD_X_SSE2 4,  8,  8, 7
+SAD_X_SSE2 4,  8,  4, 7
 
 INIT_XMM sse3
-SAD_X_SSE2 3, 16, 16
-SAD_X_SSE2 3, 16,  8
-SAD_X_SSE2 4, 16, 16
-SAD_X_SSE2 4, 16,  8
+SAD_X_SSE2 3, 16, 16, 7
+SAD_X_SSE2 3, 16,  8, 7
+SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16,  8, 7
 
 %macro SAD_X_SSSE3 3
 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8
@@ -1587,9 +1518,19 @@
 %endmacro
 
 INIT_XMM ssse3
-SAD_X_SSSE3 4, 8, 16
-SAD_X_SSSE3 4, 8,  8
-SAD_X_SSSE3 4, 8,  4
+SAD_X_SSE2  3, 16, 16, 7
+SAD_X_SSE2  3, 16,  8, 7
+SAD_X_SSE2  4, 16, 16, 7
+SAD_X_SSE2  4, 16,  8, 7
+SAD_X_SSSE3 4,  8, 16
+SAD_X_SSSE3 4,  8,  8
+SAD_X_SSSE3 4,  8,  4
+
+INIT_XMM avx
+SAD_X_SSE2 3, 16, 16, 6
+SAD_X_SSE2 3, 16,  8, 6
+SAD_X_SSE2 4, 16, 16, 7
+SAD_X_SSE2 4, 16,  8, 7
 
 %macro SAD_X_AVX2 4
 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/sad16-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad16-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* sad16-a.asm: x86 high depth sad functions
 ;*****************************************************************************
-;* Copyright (C) 2010-2013 x264 project
+;* Copyright (C) 2010-2014 x264 project
 ;*
 ;* Authors: Oskar Arvidsson <oskar@irock.se>
 ;*          Henrik Gramner <henrik@gramner.com>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/trellis-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/trellis-64.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* trellis-64.asm: x86_64 trellis quantization
 ;*****************************************************************************
-;* Copyright (C) 2012-2013 x264 project
+;* Copyright (C) 2012-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/util.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/util.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * util.h: x86 inline asm
  *****************************************************************************
- * Copyright (C) 2008-2013 x264 project
+ * Copyright (C) 2008-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86inc.asm Changed

@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
@@ -42,6 +42,14 @@
     %define public_prefix private_prefix
 %endif
 
+%ifndef STACK_ALIGNMENT
+    %if ARCH_X86_64
+        %define STACK_ALIGNMENT 16
+    %else
+        %define STACK_ALIGNMENT 4
+    %endif
+%endif
+
 %define WIN64  0
 %define UNIX64 0
 %if ARCH_X86_64
@@ -49,6 +57,8 @@
         %define WIN64  1
     %elifidn __OUTPUT_FORMAT__,win64
         %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,x64
+        %define WIN64  1
     %else
         %define UNIX64 1
     %endif
@@ -92,8 +102,9 @@
 ; %1 = number of arguments. loads them from stack if needed.
 ; %2 = number of registers used. pushes callee-saved regs if needed.
 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
-;      MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+;      allocating the specified stack size. If the required stack alignment is
+;      larger than the known stack alignment the stack will be manually aligned
 ;      and an extra register will be allocated to hold the original stack
 ;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
 ;      register as stack pointer, request a negative stack size.
@@ -101,8 +112,10 @@
 ; PROLOGUE can also be invoked by adding the same options to cglobal
 
 ; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
 
 ; TODO Some functions can use some args directly from the stack. If they're the
 ; last args then you can just not declare them, but if they're in the middle
@@ -302,26 +315,28 @@
     %assign n_arg_names %0
 %endmacro
 
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
         %if %1 != 0
-            %assign %%stack_alignment ((mmsize + 15) & ~15)
+            %assign %%pad 0
             %assign stack_size %1
             %if stack_size < 0
                 %assign stack_size -stack_size
             %endif
-            %assign stack_size_padded stack_size
             %if WIN64
-                %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+                %assign %%pad %%pad + 32 ; shadow space
                 %if mmsize != 8
                     %assign xmm_regs_used %2
                     %if xmm_regs_used > 8
-                        %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
                     %endif
                 %endif
             %endif
-            %if mmsize <= 16 && HAVE_ALIGNED_STACK
-                %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
+            %if required_stack_alignment <= STACK_ALIGNMENT
+                ; maintain the current stack alignment
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
                 SUB rsp, stack_size_padded
             %else
                 %assign %%reg_num (regs_used - 1)
@@ -330,17 +345,17 @@
                 ; it, i.e. in [rsp+stack_size_padded], so we can restore the
                 ; stack in a single instruction (i.e. mov rsp, rstk or mov
                 ; rsp, [rsp+stack_size_padded])
-                mov  rstk, rsp
                 %if %1 < 0 ; need to store rsp on stack
-                    sub  rsp, gprsize+stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
-                    %xdefine rstkm [rsp+stack_size_padded]
-                    mov rstkm, rstk
+                    %xdefine rstkm [rsp + stack_size + %%pad]
+                    %assign %%pad %%pad + gprsize
                 %else ; can keep rsp in rstk during whole function
-                    sub  rsp, stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
                     %xdefine rstkm rstk
                 %endif
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                mov rstk, rsp
+                and rsp, ~(required_stack_alignment-1)
+                sub rsp, stack_size_padded
+                movifnidn rstkm, rstk
             %endif
             WIN64_PUSH_XMM
         %endif
@@ -349,7 +364,7 @@
 
 %macro SETUP_STACK_POINTER 1
     %ifnum %1
-        %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
             %if %1 > 0
                 %assign regs_used (regs_used + 1)
             %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
@@ -423,7 +438,9 @@
     %assign xmm_regs_used %1
     ASSERT xmm_regs_used <= 16
     %if xmm_regs_used > 8
-        %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
         SUB rsp, stack_size_padded
     %endif
     WIN64_PUSH_XMM
@@ -439,7 +456,7 @@
         %endrep
     %endif
     %if stack_size_padded > 0
-        %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
             mov rsp, rstkm
         %else
             add %1, stack_size_padded
@@ -505,7 +522,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -561,7 +578,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -731,11 +748,10 @@
 %assign cpuflags_cache64  (1<<17)
 %assign cpuflags_slowctz  (1<<18)
 %assign cpuflags_lzcnt    (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<22)
-%assign cpuflags_bmi1     (1<<23)|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
+%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<21)
+%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
 
 %define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
 %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
@@ -777,9 +793,9 @@
 %endmacro
 
 ; Merge mmx and sse*
-; m# is a simd regsiter of the currently selected size
-; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
-; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
+; m# is a simd register of the currently selected size
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
 ; (All 3 remain in sync through SWAP.)
 
 %macro CAT_XDEFINE 3
@@ -802,12 +818,12 @@
     %assign %%i 0
     %rep 8
     CAT_XDEFINE m, %%i, mm %+ %%i
-    CAT_XDEFINE nmm, %%i, %%i
+    CAT_XDEFINE nnmm, %%i, %%i
     %assign %%i %%i+1
     %endrep
     %rep 8
     CAT_UNDEF m, %%i
-    CAT_UNDEF nmm, %%i
+    CAT_UNDEF nnmm, %%i
     %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
@@ -828,7 +844,7 @@
     %assign %%i 0
     %rep num_mmregs
     CAT_XDEFINE m, %%i, xmm %+ %%i
-    CAT_XDEFINE nxmm, %%i, %%i
+    CAT_XDEFINE nnxmm, %%i, %%i
     %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
@@ -865,7 +881,7 @@
     %define xmmxmm%1 xmm%1
     %define xmmymm%1 xmm%1
     %define ymmmm%1   mm%1
-    %define ymmxmm%1 ymm%1
+    %define ymmxmm%1 xmm%1
     %define ymmymm%1 ymm%1
     %define xm%1 xmm %+ m%1
     %define ym%1 ymm %+ m%1
@@ -898,7 +914,7 @@
 %endrep
 %rep %0/2
     %xdefine m%1 %%tmp%2
-    CAT_XDEFINE n, m%1, %1
+    CAT_XDEFINE nn, m%1, %1
     %rotate 2
 %endrep
 %endmacro
@@ -916,16 +932,16 @@
         %xdefine %%tmp m%1
         %xdefine m%1 m%2
         %xdefine m%2 %%tmp
-        CAT_XDEFINE n, m%1, %1
-        CAT_XDEFINE n, m%2, %2
+        CAT_XDEFINE nn, m%1, %1
+        CAT_XDEFINE nn, m%2, %2
     %rotate 1
     %endrep
 %endmacro
 
 %macro SWAP_INTERNAL_NAME 2-*
-    %xdefine %%args n %+ %1
+    %xdefine %%args nn %+ %1
     %rep %0-1
-        %xdefine %%args %%args, n %+ %2
+        %xdefine %%args %%args, nn %+ %2
     %rotate 1
     %endrep
     SWAP_INTERNAL_NUM %%args
@@ -952,7 +968,7 @@
         %assign %%i 0
         %rep num_mmregs
             CAT_XDEFINE m, %%i, %1_m %+ %%i
-            CAT_XDEFINE n, m %+ %%i, %%i
+            CAT_XDEFINE nn, m %+ %%i, %%i
         %assign %%i %%i+1
         %endrep
     %endif
@@ -1031,25 +1047,25 @@
 ;%5+: operands
 %macro RUN_AVX_INSTR 5-8+
     %ifnum sizeof%6
-        %assign %%sizeofreg sizeof%6
+        %assign __sizeofreg sizeof%6
     %elifnum sizeof%5
-        %assign %%sizeofreg sizeof%5
+        %assign __sizeofreg sizeof%5
     %else
-        %assign %%sizeofreg mmsize
+        %assign __sizeofreg mmsize
     %endif
-    %assign %%emulate_avx 0
-    %if avx_enabled && %%sizeofreg >= 16
-        %xdefine %%instr v%1
+    %assign __emulate_avx 0
+    %if avx_enabled && __sizeofreg >= 16
+        %xdefine __instr v%1
     %else
-        %xdefine %%instr %1
+        %xdefine __instr %1
         %if %0 >= 7+%3
-            %assign %%emulate_avx 1
+            %assign __emulate_avx 1
         %endif
     %endif
 
-    %if %%emulate_avx
-        %xdefine %%src1 %6
-        %xdefine %%src2 %7
+    %if __emulate_avx
+        %xdefine __src1 %6
+        %xdefine __src2 %7
         %ifnidn %5, %6
             %if %0 >= 8
                 CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8
@@ -1061,31 +1077,31 @@
                     ; 3-operand AVX instructions with a memory arg can only have it in src2,
                     ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
                     ; So, if the instruction is commutative with a memory arg, swap them.
-                    %xdefine %%src1 %7
-                    %xdefine %%src2 %6
+                    %xdefine __src1 %7
+                    %xdefine __src2 %6
                 %endif
             %endif
-            %if %%sizeofreg == 8
-                MOVQ %5, %%src1
+            %if __sizeofreg == 8
+                MOVQ %5, __src1
             %elif %2
-                MOVAPS %5, %%src1
+                MOVAPS %5, __src1
             %else
-                MOVDQA %5, %%src1
+                MOVDQA %5, __src1
             %endif
         %endif
         %if %0 >= 8
-            %1 %5, %%src2, %8
+            %1 %5, __src2, %8
         %else
-            %1 %5, %%src2
+            %1 %5, __src2
         %endif
     %elif %0 >= 8
-        %%instr %5, %6, %7, %8
+        __instr %5, %6, %7, %8
     %elif %0 == 7
-        %%instr %5, %6, %7
+        __instr %5, %6, %7
     %elif %0 == 6
-        %%instr %5, %6
+        __instr %5, %6
     %else
-        %%instr %5
+        __instr %5
     %endif
 %endmacro
 
@@ -1384,15 +1400,18 @@
     %macro %1 4-7 %1, %2, %3
         %if cpuflag(xop)
             v%5 %1, %2, %3, %4
-        %else
+        %elifnidn %1, %4
             %6 %1, %2, %3
             %7 %1, %4
+        %else
+            %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
         %endif
     %endmacro
 %endmacro
 
-FMA_INSTR  pmacsdd,  pmulld, paddd
 FMA_INSTR  pmacsww,  pmullw, paddw
+FMA_INSTR  pmacsdd,  pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql,  pmuldq, paddq ; sse4 emulation
 FMA_INSTR pmadcswd, pmaddwd, paddd
 
 ; convert FMA4 to FMA3 if possible

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
@@ -42,6 +42,14 @@
     %define public_prefix private_prefix
 %endif
 
+%ifndef STACK_ALIGNMENT
+    %if ARCH_X86_64
+        %define STACK_ALIGNMENT 16
+    %else
+        %define STACK_ALIGNMENT 4
+    %endif
+%endif
+
 %define WIN64  0
 %define UNIX64 0
 %if ARCH_X86_64
@@ -49,6 +57,8 @@
         %define WIN64  1
     %elifidn __OUTPUT_FORMAT__,win64
         %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,x64
+        %define WIN64  1
     %else
         %define UNIX64 1
     %endif
@@ -92,8 +102,9 @@
 ; %1 = number of arguments. loads them from stack if needed.
 ; %2 = number of registers used. pushes callee-saved regs if needed.
 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
-;      MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+;      allocating the specified stack size. If the required stack alignment is
+;      larger than the known stack alignment the stack will be manually aligned
 ;      and an extra register will be allocated to hold the original stack
 ;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
 ;      register as stack pointer, request a negative stack size.
@@ -101,8 +112,10 @@
 ; PROLOGUE can also be invoked by adding the same options to cglobal
 
 ; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
 
 ; TODO Some functions can use some args directly from the stack. If they're the
 ; last args then you can just not declare them, but if they're in the middle
@@ -302,26 +315,28 @@
     %assign n_arg_names %0
 %endmacro
 
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
         %if %1 != 0
-            %assign %%stack_alignment ((mmsize + 15) & ~15)
+            %assign %%pad 0
             %assign stack_size %1
             %if stack_size < 0
                 %assign stack_size -stack_size
             %endif
-            %assign stack_size_padded stack_size
             %if WIN64
-                %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+                %assign %%pad %%pad + 32 ; shadow space
                 %if mmsize != 8
                     %assign xmm_regs_used %2
                     %if xmm_regs_used > 8
-                        %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
                     %endif
                 %endif
             %endif
-            %if mmsize <= 16 && HAVE_ALIGNED_STACK
-                %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
+            %if required_stack_alignment <= STACK_ALIGNMENT
+                ; maintain the current stack alignment
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
                 SUB rsp, stack_size_padded
             %else
                 %assign %%reg_num (regs_used - 1)
@@ -330,17 +345,17 @@
                 ; it, i.e. in [rsp+stack_size_padded], so we can restore the
                 ; stack in a single instruction (i.e. mov rsp, rstk or mov
                 ; rsp, [rsp+stack_size_padded])
-                mov  rstk, rsp
                 %if %1 < 0 ; need to store rsp on stack
-                    sub  rsp, gprsize+stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
-                    %xdefine rstkm [rsp+stack_size_padded]
-                    mov rstkm, rstk
+                    %xdefine rstkm [rsp + stack_size + %%pad]
+                    %assign %%pad %%pad + gprsize
                 %else ; can keep rsp in rstk during whole function
-                    sub  rsp, stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
                     %xdefine rstkm rstk
                 %endif
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                mov rstk, rsp
+                and rsp, ~(required_stack_alignment-1)
+                sub rsp, stack_size_padded
+                movifnidn rstkm, rstk
             %endif
             WIN64_PUSH_XMM
         %endif
@@ -349,7 +364,7 @@
 
 %macro SETUP_STACK_POINTER 1
     %ifnum %1
-        %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
             %if %1 > 0
                 %assign regs_used (regs_used + 1)
             %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
@@ -423,7 +438,9 @@
     %assign xmm_regs_used %1
     ASSERT xmm_regs_used <= 16
     %if xmm_regs_used > 8
-        %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
         SUB rsp, stack_size_padded
     %endif
     WIN64_PUSH_XMM
@@ -439,7 +456,7 @@
         %endrep
     %endif
     %if stack_size_padded > 0
-        %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
             mov rsp, rstkm
         %else
             add %1, stack_size_padded
@@ -505,7 +522,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -561,7 +578,7 @@
 
 %macro RET 0
 %if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
+%if required_stack_alignment > STACK_ALIGNMENT
     mov rsp, rstkm
 %else
     add rsp, stack_size_padded
@@ -731,11 +748,10 @@
 %assign cpuflags_cache64  (1<<17)
 %assign cpuflags_slowctz  (1<<18)
 %assign cpuflags_lzcnt    (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<22)
-%assign cpuflags_bmi1     (1<<23)|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
+%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<21)
+%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
 
 %define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
 %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
@@ -777,9 +793,9 @@
 %endmacro
 
 ; Merge mmx and sse*
-; m# is a simd regsiter of the currently selected size
-; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
-; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
+; m# is a simd register of the currently selected size
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
 ; (All 3 remain in sync through SWAP.)
 
 %macro CAT_XDEFINE 3
@@ -802,12 +818,12 @@
     %assign %%i 0
     %rep 8
     CAT_XDEFINE m, %%i, mm %+ %%i
-    CAT_XDEFINE nmm, %%i, %%i
+    CAT_XDEFINE nnmm, %%i, %%i
     %assign %%i %%i+1
     %endrep
     %rep 8
     CAT_UNDEF m, %%i
-    CAT_UNDEF nmm, %%i
+    CAT_UNDEF nnmm, %%i
     %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
@@ -828,7 +844,7 @@
     %assign %%i 0
     %rep num_mmregs
     CAT_XDEFINE m, %%i, xmm %+ %%i
-    CAT_XDEFINE nxmm, %%i, %%i
+    CAT_XDEFINE nnxmm, %%i, %%i
     %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
@@ -865,7 +881,7 @@
     %define xmmxmm%1 xmm%1
     %define xmmymm%1 xmm%1
     %define ymmmm%1   mm%1
-    %define ymmxmm%1 ymm%1
+    %define ymmxmm%1 xmm%1
     %define ymmymm%1 ymm%1
     %define xm%1 xmm %+ m%1
     %define ym%1 ymm %+ m%1
@@ -898,7 +914,7 @@
 %endrep
 %rep %0/2
     %xdefine m%1 %%tmp%2
-    CAT_XDEFINE n, m%1, %1
+    CAT_XDEFINE nn, m%1, %1
     %rotate 2
 %endrep
 %endmacro
@@ -916,16 +932,16 @@
         %xdefine %%tmp m%1
         %xdefine m%1 m%2
         %xdefine m%2 %%tmp
-        CAT_XDEFINE n, m%1, %1
-        CAT_XDEFINE n, m%2, %2
+        CAT_XDEFINE nn, m%1, %1
+        CAT_XDEFINE nn, m%2, %2
     %rotate 1
     %endrep
 %endmacro
 
 %macro SWAP_INTERNAL_NAME 2-*
-    %xdefine %%args n %+ %1
+    %xdefine %%args nn %+ %1
     %rep %0-1
-        %xdefine %%args %%args, n %+ %2
+        %xdefine %%args %%args, nn %+ %2
     %rotate 1
     %endrep
     SWAP_INTERNAL_NUM %%args
@@ -952,7 +968,7 @@
         %assign %%i 0
         %rep num_mmregs
             CAT_XDEFINE m, %%i, %1_m %+ %%i
-            CAT_XDEFINE n, m %+ %%i, %%i
+            CAT_XDEFINE nn, m %+ %%i, %%i
         %assign %%i %%i+1
         %endrep
     %endif
@@ -1031,25 +1047,25 @@
 ;%5+: operands
 %macro RUN_AVX_INSTR 5-8+
     %ifnum sizeof%6
-        %assign %%sizeofreg sizeof%6
+        %assign __sizeofreg sizeof%6
     %elifnum sizeof%5
-        %assign %%sizeofreg sizeof%5
+        %assign __sizeofreg sizeof%5
     %else
-        %assign %%sizeofreg mmsize
+        %assign __sizeofreg mmsize
     %endif
-    %assign %%emulate_avx 0
-    %if avx_enabled && %%sizeofreg >= 16
-        %xdefine %%instr v%1
+    %assign __emulate_avx 0
+    %if avx_enabled && __sizeofreg >= 16
+        %xdefine __instr v%1
     %else
-        %xdefine %%instr %1
+        %xdefine __instr %1
         %if %0 >= 7+%3
-            %assign %%emulate_avx 1
+            %assign __emulate_avx 1
         %endif
     %endif
 
-    %if %%emulate_avx
-        %xdefine %%src1 %6
-        %xdefine %%src2 %7
+    %if __emulate_avx
+        %xdefine __src1 %6
+        %xdefine __src2 %7
         %ifnidn %5, %6
             %if %0 >= 8
                 CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8
@@ -1061,31 +1077,31 @@
                     ; 3-operand AVX instructions with a memory arg can only have it in src2,
                     ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
                     ; So, if the instruction is commutative with a memory arg, swap them.
-                    %xdefine %%src1 %7
-                    %xdefine %%src2 %6
+                    %xdefine __src1 %7
+                    %xdefine __src2 %6
                 %endif
             %endif
-            %if %%sizeofreg == 8
-                MOVQ %5, %%src1
+            %if __sizeofreg == 8
+                MOVQ %5, __src1
             %elif %2
-                MOVAPS %5, %%src1
+                MOVAPS %5, __src1
             %else
-                MOVDQA %5, %%src1
+                MOVDQA %5, __src1
             %endif
         %endif
         %if %0 >= 8
-            %1 %5, %%src2, %8
+            %1 %5, __src2, %8
         %else
-            %1 %5, %%src2
+            %1 %5, __src2
         %endif
     %elif %0 >= 8
-        %%instr %5, %6, %7, %8
+        __instr %5, %6, %7, %8
     %elif %0 == 7
-        %%instr %5, %6, %7
+        __instr %5, %6, %7
     %elif %0 == 6
-        %%instr %5, %6
+        __instr %5, %6
     %else
-        %%instr %5
+        __instr %5
     %endif
 %endmacro
 
@@ -1384,15 +1400,18 @@
     %macro %1 4-7 %1, %2, %3
         %if cpuflag(xop)
             v%5 %1, %2, %3, %4
-        %else
+        %elifnidn %1, %4
             %6 %1, %2, %3
             %7 %1, %4
+        %else
+            %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
         %endif
     %endmacro
 %endmacro
 
-FMA_INSTR  pmacsdd,  pmulld, paddd
 FMA_INSTR  pmacsww,  pmullw, paddw
+FMA_INSTR  pmacsdd,  pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql,  pmuldq, paddq ; sse4 emulation
 FMA_INSTR pmadcswd, pmaddwd, paddd
 
 ; convert FMA4 to FMA3 if possible
​

x264-snapshot-20130723-2245.tar.bz2/common/x86/x86util.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86util.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86util.asm: x86 utility macros
 ;*****************************************************************************
-;* Copyright (C) 2008-2013 x264 project
+;* Copyright (C) 2008-2014 x264 project
 ;*
 ;* Authors: Holger Lubitz <holger@lubitz.org>
 ;*          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/configure -> x264-snapshot-20140321-2245.tar.bz2/configure Changed

@@ -30,7 +30,6 @@
   --disable-thread         disable multithreaded encoding
   --enable-win32thread     use win32threads (windows only)
   --disable-interlaced     disable interlaced encoding support
-  --enable-visualize       enable visualization (X11 only)
   --bit-depth=BIT_DEPTH    set output bit depth (8-10) [8]
   --chroma-format=FORMAT   output chroma format (420, 422, 444, all) [all]
 
@@ -52,6 +51,7 @@
   --disable-lavf           disable libavformat support
   --disable-ffms           disable ffmpegsource support
   --disable-gpac           disable gpac support
+  --disable-lsmash         disable lsmash support
 
 EOF
 exit 1
@@ -264,6 +264,8 @@
 lavf="auto"
 ffms="auto"
 gpac="auto"
+lsmash="auto"
+mp4="no"
 gpl="yes"
 thread="auto"
 swscale="auto"
@@ -273,7 +275,6 @@
 gprof="no"
 strip="no"
 pic="no"
-vis="no"
 bit_depth="8"
 chroma_format="all"
 compiler="GNU"
@@ -290,7 +291,8 @@
 EXE=""
 
 # list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH"
 
 # parse options
 
@@ -342,6 +344,9 @@
         --disable-gpac)
             gpac="no"
             ;;
+        --disable-lsmash)
+            lsmash="no"
+            ;;
         --disable-gpl)
             gpl="no"
             ;;
@@ -380,9 +385,6 @@
         --enable-pic)
             pic="yes"
             ;;
-        --enable-visualize)
-            vis="yes"
-            ;;
         --host=*)
             host="$optarg"
             ;;
@@ -423,6 +425,7 @@
 AR="${AR-${cross_prefix}ar}"
 RANLIB="${RANLIB-${cross_prefix}ranlib}"
 STRIP="${STRIP-${cross_prefix}strip}"
+INSTALL="${INSTALL-install}"
 
 if [ "x$host" = x ]; then
     host=`${SRCPATH}/config.guess`
@@ -503,12 +506,13 @@
             CFLAGS="$CFLAGS -mno-cygwin"
             LDFLAGS="$LDFLAGS -mno-cygwin"
         fi
-        if cpp_check "" "" "defined(__CYGWIN32__)" ; then
+        if cpp_check "" "" "defined(__CYGWIN__)" ; then
             define HAVE_MALLOC_H
             SYS="CYGWIN"
         else
             SYS="WINDOWS"
             DEVNULL="NUL"
+            LDFLAGSCLI="$LDFLAGSCLI -lshell32"
             RC="${RC-${cross_prefix}windres}"
         fi
         ;;
@@ -516,6 +520,7 @@
         SYS="WINDOWS"
         EXE=".exe"
         DEVNULL="NUL"
+        LDFLAGSCLI="$LDFLAGSCLI -lshell32"
         [ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}"
         ;;
     sunos*|solaris*)
@@ -527,6 +532,15 @@
         else
             LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o"
         fi
+        if test -x /usr/ucb/install ; then
+            INSTALL=/usr/ucb/install
+        elif test -x /usr/bin/ginstall ; then
+            # OpenSolaris
+            INSTALL=/usr/bin/ginstall
+        elif test -x /usr/gnu/bin/install ; then
+            # OpenSolaris
+            INSTALL=/usr/gnu/bin/install
+        fi
         HAVE_GETOPT_LONG=0
         ;;
     *qnx*)
@@ -543,7 +557,7 @@
 
 LDFLAGS="$LDFLAGS $libm"
 
-aligned_stack=1
+stack_alignment=16
 case $host_cpu in
     i*86)
         ARCH="X86"
@@ -563,8 +577,7 @@
             if [ $SYS = LINUX ]; then
                 # < 11 is completely incapable of keeping a mod16 stack
                 if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
-                    define BROKEN_STACK_ALIGNMENT
-                    aligned_stack=0
+                    stack_alignment=4
                 # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
                 elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
                     CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
@@ -572,7 +585,7 @@
                 # >= 12 defaults to a mod16 stack
             fi
             # icl on windows has no mod16 stack support
-            [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
+            [ $SYS = WINDOWS ] && stack_alignment=4
         fi
         if [ "$SYS" = MACOSX ]; then
             ASFLAGS="$ASFLAGS -f macho -DPREFIX"
@@ -595,7 +608,7 @@
                 CFLAGS="$CFLAGS -arch x86_64"
                 LDFLAGS="$LDFLAGS -arch x86_64"
             fi
-        elif [ "$SYS" = WINDOWS ]; then
+        elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
             ASFLAGS="$ASFLAGS -f win32 -m amd64"
             # only the GNU toolchain is inconsistent in prefixing function names with _
             [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
@@ -667,7 +680,6 @@
         ARCH="$(echo $host_cpu | tr a-z A-Z)"
         ;;
 esac
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
 
 if [ $SYS = WINDOWS ]; then
     if ! rc_check "0 RCDATA {0}" ; then
@@ -719,10 +731,11 @@
         echo "If you really want to compile without asm, configure with --disable-asm."
         exit 1
     fi
+    ASFLAGS="$ASFLAGS -Worphan-labels"
     define HAVE_MMX
-    if cc_check '' -mpreferred-stack-boundary=5 ; then
+    if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
         CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
-        define HAVE_32B_STACK_ALIGNMENT
+        stack_alignment=32
     fi
 fi
 
@@ -747,6 +760,9 @@
 define ARCH_$ARCH
 define SYS_$SYS
 
+define STACK_ALIGNMENT $stack_alignment
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
+
 # skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
 if [ $compiler = GNU ]; then
     echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
@@ -792,10 +808,15 @@
             fi
             ;;
         QNX)
-            cc_check pthread.h -lc && thread="posix" && libpthread="-lc"
+            cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc"
             ;;
         *)
-            cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread"
+            if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
+               thread="posix"
+               libpthread="-lpthread"
+            else
+                cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread=""
+            fi
             ;;
     esac
 fi
@@ -820,16 +841,8 @@
     define HAVE_LOG2F
 fi
 
-if [ "$vis" = "yes" ] ; then
-    save_CFLAGS="$CFLAGS"
-    CFLAGS="$CFLAGS -I/usr/X11R6/include"
-    if cc_check "X11/Xlib.h" "-L/usr/X11R6/lib -lX11" "XOpenDisplay(0);" ; then
-        LDFLAGS="-L/usr/X11R6/lib -lX11 $LDFLAGS"
-        define HAVE_VISUALIZE
-    else
-        vis="no"
-        CFLAGS="$save_CFLAGS"
-   fi
+if [ "$SYS" = "LINUX" -a $ "$ARCH" = "X86" -o "$ARCH" = "X86_64" $ ] && cc_check "sys/mman.h" "" "MADV_HUGEPAGE;" ; then
+    define HAVE_THP
 fi
 
 if [ "$swscale" = "auto" ] ; then
@@ -841,10 +854,10 @@
     [ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil"
 
     if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then
-        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(PIX_FMT_RGB)" ; then
+        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then
             swscale="yes"
         else
-            echo "Warning: PIX_FMT_RGB is missing from libavutil, update for swscale support"
+            echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support"
         fi
     fi
 fi
@@ -857,7 +870,7 @@
     fi
     if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
         LAVF_LIBS="-lavformat"
-        for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do
+        for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do
             cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
         done
     fi
@@ -915,11 +928,30 @@
     fi
 fi
 
-if [ "$gpac" = "auto" ] ; then
+if [ "$lsmash" = "auto" ] ; then
+    lsmash="no"
+    if ${cross_prefix}pkg-config --exists liblsmash 2>/dev/null; then
+        LSMASH_LIBS="$LSMASH_LIBS $(${cross_prefix}pkg-config --libs liblsmash)"
+        LSMASH_CFLAGS="$LSMASH_CFLAGS $(${cross_prefix}pkg-config --cflags liblsmash)"
+    fi
+    [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash"
+
+    if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then
+        if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 0 || (LSMASH_VERSION_MAJOR == 0 && LSMASH_VERSION_MINOR >= 1)" ; then
+            lsmash="yes"
+        else
+            echo "Warning: lsmash is too old, update to rev.751 or later"
+        fi
+    fi
+fi
+
+if [ "$gpac" = "auto" -a "$lsmash" != "yes" ] ; then
     gpac="no"
-    cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static"
+    GPAC_LIBS="-lgpac_static"
+    cc_check "" -lz && GPAC_LIBS="$GPAC_LIBS -lz"
     if [ "$SYS" = "WINDOWS" ] ; then
-        GPAC_LIBS="$GPAC_LIBS -lwinmm"
+        cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32"
+        cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm"
     fi
     if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then
         if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
@@ -929,18 +961,22 @@
         fi
     fi
 fi
-if [ "$gpac" = "yes" ] ; then
+
+if [ "$lsmash" = "yes" ] ; then
+    mp4="lsmash"
+    LDFLAGSCLI="$LSMASH_LIBS $LDFLAGSCLI"
+    CFLAGS="$CFLAGS $LSMASH_CFLAGS"
+    define HAVE_LSMASH
+elif [ "$gpac" = "yes" ] ; then
+    mp4="gpac"
     define HAVE_GPAC
-    if cc_check gpac/isomedia.h "-Werror $GPAC_LIBS" "void *p; p = gf_malloc(1); gf_free(p);" ; then
-        define HAVE_GF_MALLOC
-    fi
     LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI"
 fi
 
 if [ "$avs" = "auto" ] ; then
     avs="no"
     # cygwin can use avisynth if it can use LoadLibrary
-    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
         avs="avisynth"
         define HAVE_AVS
         define USE_AVXSYNTH 0
@@ -1038,7 +1074,7 @@
     fi
     log_ok
     # cygwin can use opencl if it can use LoadLibrary
-    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
         opencl="yes"
         define HAVE_OPENCL
     elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
@@ -1129,6 +1165,7 @@
 AR=$AR
 RANLIB=$RANLIB
 STRIP=$STRIP
+INSTALL=$INSTALL
 AS=$AS
 ASFLAGS=$ASFLAGS
 RC=$RC
@@ -1219,8 +1256,8 @@
 Name: x264
 Description: H.264 (MPEG4 AVC) encoder library
 Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
-Libs: -L$libdir -lx264
-Libs.private: $libpthread $libm $libdl
+Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
+Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
 Cflags: -I$includedir
 EOF
 
@@ -1241,7 +1278,7 @@
 avs:           $avs
 lavf:          $lavf
 ffms:          $ffms
-gpac:          $gpac
+mp4:           $mp4
 gpl:           $gpl
 thread:        $thread
 opencl:        $opencl
@@ -1250,7 +1287,6 @@
 gprof:         $gprof
 strip:         $strip
 PIC:           $pic
-visualize:     $vis
 bit depth:     $bit_depth
 chroma format: $chroma_format
 EOF

 
@@ -30,7 +30,6 @@
   --disable-thread         disable multithreaded encoding
   --enable-win32thread     use win32threads (windows only)
   --disable-interlaced     disable interlaced encoding support
-  --enable-visualize       enable visualization (X11 only)
   --bit-depth=BIT_DEPTH    set output bit depth (8-10) [8]
   --chroma-format=FORMAT   output chroma format (420, 422, 444, all) [all]
 
@@ -52,6 +51,7 @@
   --disable-lavf           disable libavformat support
   --disable-ffms           disable ffmpegsource support
   --disable-gpac           disable gpac support
+  --disable-lsmash         disable lsmash support
 
 EOF
 exit 1
@@ -264,6 +264,8 @@
 lavf="auto"
 ffms="auto"
 gpac="auto"
+lsmash="auto"
+mp4="no"
 gpl="yes"
 thread="auto"
 swscale="auto"
@@ -273,7 +275,6 @@
 gprof="no"
 strip="no"
 pic="no"
-vis="no"
 bit_depth="8"
 chroma_format="all"
 compiler="GNU"
@@ -290,7 +291,8 @@
 EXE=""
 
 # list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
+             LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH"
 
 # parse options
 
@@ -342,6 +344,9 @@
         --disable-gpac)
             gpac="no"
             ;;
+        --disable-lsmash)
+            lsmash="no"
+            ;;
         --disable-gpl)
             gpl="no"
             ;;
@@ -380,9 +385,6 @@
         --enable-pic)
             pic="yes"
             ;;
-        --enable-visualize)
-            vis="yes"
-            ;;
         --host=*)
             host="$optarg"
             ;;
@@ -423,6 +425,7 @@
 AR="${AR-${cross_prefix}ar}"
 RANLIB="${RANLIB-${cross_prefix}ranlib}"
 STRIP="${STRIP-${cross_prefix}strip}"
+INSTALL="${INSTALL-install}"
 
 if [ "x$host" = x ]; then
     host=`${SRCPATH}/config.guess`
@@ -503,12 +506,13 @@
             CFLAGS="$CFLAGS -mno-cygwin"
             LDFLAGS="$LDFLAGS -mno-cygwin"
         fi
-        if cpp_check "" "" "defined(__CYGWIN32__)" ; then
+        if cpp_check "" "" "defined(__CYGWIN__)" ; then
             define HAVE_MALLOC_H
             SYS="CYGWIN"
         else
             SYS="WINDOWS"
             DEVNULL="NUL"
+            LDFLAGSCLI="$LDFLAGSCLI -lshell32"
             RC="${RC-${cross_prefix}windres}"
         fi
         ;;
@@ -516,6 +520,7 @@
         SYS="WINDOWS"
         EXE=".exe"
         DEVNULL="NUL"
+        LDFLAGSCLI="$LDFLAGSCLI -lshell32"
         [ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}"
         ;;
     sunos*|solaris*)
@@ -527,6 +532,15 @@
         else
             LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o"
         fi
+        if test -x /usr/ucb/install ; then
+            INSTALL=/usr/ucb/install
+        elif test -x /usr/bin/ginstall ; then
+            # OpenSolaris
+            INSTALL=/usr/bin/ginstall
+        elif test -x /usr/gnu/bin/install ; then
+            # OpenSolaris
+            INSTALL=/usr/gnu/bin/install
+        fi
         HAVE_GETOPT_LONG=0
         ;;
     *qnx*)
@@ -543,7 +557,7 @@
 
 LDFLAGS="$LDFLAGS $libm"
 
-aligned_stack=1
+stack_alignment=16
 case $host_cpu in
     i*86)
         ARCH="X86"
@@ -563,8 +577,7 @@
             if [ $SYS = LINUX ]; then
                 # < 11 is completely incapable of keeping a mod16 stack
                 if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
-                    define BROKEN_STACK_ALIGNMENT
-                    aligned_stack=0
+                    stack_alignment=4
                 # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
                 elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
                     CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
@@ -572,7 +585,7 @@
                 # >= 12 defaults to a mod16 stack
             fi
             # icl on windows has no mod16 stack support
-            [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
+            [ $SYS = WINDOWS ] && stack_alignment=4
         fi
         if [ "$SYS" = MACOSX ]; then
             ASFLAGS="$ASFLAGS -f macho -DPREFIX"
@@ -595,7 +608,7 @@
                 CFLAGS="$CFLAGS -arch x86_64"
                 LDFLAGS="$LDFLAGS -arch x86_64"
             fi
-        elif [ "$SYS" = WINDOWS ]; then
+        elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
             ASFLAGS="$ASFLAGS -f win32 -m amd64"
             # only the GNU toolchain is inconsistent in prefixing function names with _
             [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
@@ -667,7 +680,6 @@
         ARCH="$(echo $host_cpu | tr a-z A-Z)"
         ;;
 esac
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
 
 if [ $SYS = WINDOWS ]; then
     if ! rc_check "0 RCDATA {0}" ; then
@@ -719,10 +731,11 @@
         echo "If you really want to compile without asm, configure with --disable-asm."
         exit 1
     fi
+    ASFLAGS="$ASFLAGS -Worphan-labels"
     define HAVE_MMX
-    if cc_check '' -mpreferred-stack-boundary=5 ; then
+    if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
         CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
-        define HAVE_32B_STACK_ALIGNMENT
+        stack_alignment=32
     fi
 fi
 
@@ -747,6 +760,9 @@
 define ARCH_$ARCH
 define SYS_$SYS
 
+define STACK_ALIGNMENT $stack_alignment
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
+
 # skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
 if [ $compiler = GNU ]; then
     echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
@@ -792,10 +808,15 @@
             fi
             ;;
         QNX)
-            cc_check pthread.h -lc && thread="posix" && libpthread="-lc"
+            cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc"
             ;;
         *)
-            cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread"
+            if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
+               thread="posix"
+               libpthread="-lpthread"
+            else
+                cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread=""
+            fi
             ;;
     esac
 fi
@@ -820,16 +841,8 @@
     define HAVE_LOG2F
 fi
 
-if [ "$vis" = "yes" ] ; then
-    save_CFLAGS="$CFLAGS"
-    CFLAGS="$CFLAGS -I/usr/X11R6/include"
-    if cc_check "X11/Xlib.h" "-L/usr/X11R6/lib -lX11" "XOpenDisplay(0);" ; then
-        LDFLAGS="-L/usr/X11R6/lib -lX11 $LDFLAGS"
-        define HAVE_VISUALIZE
-    else
-        vis="no"
-        CFLAGS="$save_CFLAGS"
-   fi
+if [ "$SYS" = "LINUX" -a \( "$ARCH" = "X86" -o "$ARCH" = "X86_64" \) ] && cc_check "sys/mman.h" "" "MADV_HUGEPAGE;" ; then
+    define HAVE_THP
 fi
 
 if [ "$swscale" = "auto" ] ; then
@@ -841,10 +854,10 @@
     [ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil"
 
     if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then
-        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(PIX_FMT_RGB)" ; then
+        if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then
             swscale="yes"
         else
-            echo "Warning: PIX_FMT_RGB is missing from libavutil, update for swscale support"
+            echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support"
         fi
     fi
 fi
@@ -857,7 +870,7 @@
     fi
     if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
         LAVF_LIBS="-lavformat"
-        for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do
+        for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do
             cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
         done
     fi
@@ -915,11 +928,30 @@
     fi
 fi
 
-if [ "$gpac" = "auto" ] ; then
+if [ "$lsmash" = "auto" ] ; then
+    lsmash="no"
+    if ${cross_prefix}pkg-config --exists liblsmash 2>/dev/null; then
+        LSMASH_LIBS="$LSMASH_LIBS $(${cross_prefix}pkg-config --libs liblsmash)"
+        LSMASH_CFLAGS="$LSMASH_CFLAGS $(${cross_prefix}pkg-config --cflags liblsmash)"
+    fi
+    [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash"
+
+    if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then
+        if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 0 || (LSMASH_VERSION_MAJOR == 0 && LSMASH_VERSION_MINOR >= 1)" ; then
+            lsmash="yes"
+        else
+            echo "Warning: lsmash is too old, update to rev.751 or later"
+        fi
+    fi
+fi
+
+if [ "$gpac" = "auto" -a "$lsmash" != "yes" ] ; then
     gpac="no"
-    cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static"
+    GPAC_LIBS="-lgpac_static"
+    cc_check "" -lz && GPAC_LIBS="$GPAC_LIBS -lz"
     if [ "$SYS" = "WINDOWS" ] ; then
-        GPAC_LIBS="$GPAC_LIBS -lwinmm"
+        cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32"
+        cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm"
     fi
     if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then
         if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
@@ -929,18 +961,22 @@
         fi
     fi
 fi
-if [ "$gpac" = "yes" ] ; then
+
+if [ "$lsmash" = "yes" ] ; then
+    mp4="lsmash"
+    LDFLAGSCLI="$LSMASH_LIBS $LDFLAGSCLI"
+    CFLAGS="$CFLAGS $LSMASH_CFLAGS"
+    define HAVE_LSMASH
+elif [ "$gpac" = "yes" ] ; then
+    mp4="gpac"
     define HAVE_GPAC
-    if cc_check gpac/isomedia.h "-Werror $GPAC_LIBS" "void *p; p = gf_malloc(1); gf_free(p);" ; then
-        define HAVE_GF_MALLOC
-    fi
     LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI"
 fi
 
 if [ "$avs" = "auto" ] ; then
     avs="no"
     # cygwin can use avisynth if it can use LoadLibrary
-    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
         avs="avisynth"
         define HAVE_AVS
         define USE_AVXSYNTH 0
@@ -1038,7 +1074,7 @@
     fi
     log_ok
     # cygwin can use opencl if it can use LoadLibrary
-    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
         opencl="yes"
         define HAVE_OPENCL
     elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
@@ -1129,6 +1165,7 @@
 AR=$AR
 RANLIB=$RANLIB
 STRIP=$STRIP
+INSTALL=$INSTALL
 AS=$AS
 ASFLAGS=$ASFLAGS
 RC=$RC
@@ -1219,8 +1256,8 @@
 Name: x264
 Description: H.264 (MPEG4 AVC) encoder library
 Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
-Libs: -L$libdir -lx264
-Libs.private: $libpthread $libm $libdl
+Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
+Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
 Cflags: -I$includedir
 EOF
 
@@ -1241,7 +1278,7 @@
 avs:           $avs
 lavf:          $lavf
 ffms:          $ffms
-gpac:          $gpac
+mp4:           $mp4
 gpl:           $gpl
 thread:        $thread
 opencl:        $opencl
@@ -1250,7 +1287,6 @@
 gprof:         $gprof
 strip:         $strip
 PIC:           $pic
-visualize:     $vis
 bit depth:     $bit_depth
 chroma format: $chroma_format
 EOF
​

x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.c: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -436,7 +436,7 @@
     /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
      * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
     uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
-    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
+    a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
 
     a->b_fast_intra = 0;
     a->b_avoid_topright = 0;
@@ -618,6 +618,24 @@
     {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
 };
 
+static const int8_t i8x8_mode_available[2][5][10] =
+{
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
+    },
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+    }
+};
+
 static const int8_t i4x4_mode_available[2][5][10] =
 {
     {
@@ -632,7 +650,7 @@
         {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
-        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1},
     }
 };
 
@@ -655,7 +673,7 @@
     int avoid_topright = force_intra && (i&1);
     int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
     idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
-    return i4x4_mode_available[avoid_topright][idx];
+    return i8x8_mode_available[avoid_topright][idx];
 }
 
 static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
@@ -793,58 +811,60 @@
     int lambda = a->i_lambda;
 
     /*---------------- Try all mode and calculate their score ---------------*/
+    /* Disabled i16x16 for AVC-Intra compat */
+    if( !h->param.i_avcintra_class )
+    {
+        const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
 
-    /* 16x16 prediction selection */
-    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
+        /* Not heavily tuned */
+        static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
+        int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
 
-    /* Not heavily tuned */
-    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
-    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
-
-    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
-    {
-        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
-        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
-        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
-        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
-
-        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
-        if( a->i_satd_i16x16 <= i16x16_thresh )
-        {
-            h->predict_16x16[I_PRED_16x16_P]( p_dst );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
-            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+        if( !h->mb.b_lossless && predict_mode[3] >= 0 )
+        {
+            h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
+            a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
+            a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
+            a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
+
+            /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
+            if( a->i_satd_i16x16 <= i16x16_thresh )
+            {
+                h->predict_16x16[I_PRED_16x16_P]( p_dst );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
+                COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+            }
         }
-    }
-    else
-    {
-        for( ; *predict_mode >= 0; predict_mode++ )
+        else
         {
-            int i_satd;
-            int i_mode = *predict_mode;
+            for( ; *predict_mode >= 0; predict_mode++ )
+            {
+                int i_satd;
+                int i_mode = *predict_mode;
 
-            if( h->mb.b_lossless )
-                x264_predict_lossless_16x16( h, 0, i_mode );
-            else
-                h->predict_16x16[i_mode]( p_dst );
+                if( h->mb.b_lossless )
+                    x264_predict_lossless_16x16( h, 0, i_mode );
+                else
+                    h->predict_16x16[i_mode]( p_dst );
 
-            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
-                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
-            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
-            a->i_satd_i16x16_dir[i_mode] = i_satd;
+                i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
+                         lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
+                COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
+                a->i_satd_i16x16_dir[i_mode] = i_satd;
+            }
         }
-    }
 
-    if( h->sh.i_type == SLICE_TYPE_B )
-        /* cavlc mb type prefix */
-        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
+        if( h->sh.i_type == SLICE_TYPE_B )
+            /* cavlc mb type prefix */
+            a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
 
-    if( a->i_satd_i16x16 > i16x16_thresh )
-        return;
+        if( a->i_satd_i16x16 > i16x16_thresh )
+            return;
+    }
 
     uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
     /* 8x8 prediction selection */
@@ -870,7 +890,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
 
-            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
+            const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
             h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
 
             if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
@@ -985,7 +1005,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
 
-            predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
+            const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
 
             if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                 /* emulate missing topright samples */
@@ -2101,7 +2121,7 @@
         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
                    + ref_costs + l0_mv_cost + l1_mv_cost;
 
-        if( h->mb.b_chroma_me )
+        if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
         {
             ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.c: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -436,7 +436,7 @@
     /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
      * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
     uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
-    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
+    a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
 
     a->b_fast_intra = 0;
     a->b_avoid_topright = 0;
@@ -618,6 +618,24 @@
     {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
 };
 
+static const int8_t i8x8_mode_available[2][5][10] =
+{
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
+    },
+    {
+        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+        {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1},
+        {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
+    }
+};
+
 static const int8_t i4x4_mode_available[2][5][10] =
 {
     {
@@ -632,7 +650,7 @@
         {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
         {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
-        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
+        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1},
     }
 };
 
@@ -655,7 +673,7 @@
     int avoid_topright = force_intra && (i&1);
     int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
     idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
-    return i4x4_mode_available[avoid_topright][idx];
+    return i8x8_mode_available[avoid_topright][idx];
 }
 
 static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
@@ -793,58 +811,60 @@
     int lambda = a->i_lambda;
 
     /*---------------- Try all mode and calculate their score ---------------*/
+    /* Disabled i16x16 for AVC-Intra compat */
+    if( !h->param.i_avcintra_class )
+    {
+        const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
 
-    /* 16x16 prediction selection */
-    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
+        /* Not heavily tuned */
+        static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
+        int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
 
-    /* Not heavily tuned */
-    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
-    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
-
-    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
-    {
-        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
-        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
-        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
-        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
-        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
-
-        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
-        if( a->i_satd_i16x16 <= i16x16_thresh )
-        {
-            h->predict_16x16[I_PRED_16x16_P]( p_dst );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
-            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
-            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+        if( !h->mb.b_lossless && predict_mode[3] >= 0 )
+        {
+            h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
+            a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
+            a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
+            a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
+
+            /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
+            if( a->i_satd_i16x16 <= i16x16_thresh )
+            {
+                h->predict_16x16[I_PRED_16x16_P]( p_dst );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
+                a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
+                COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
+            }
         }
-    }
-    else
-    {
-        for( ; *predict_mode >= 0; predict_mode++ )
+        else
         {
-            int i_satd;
-            int i_mode = *predict_mode;
+            for( ; *predict_mode >= 0; predict_mode++ )
+            {
+                int i_satd;
+                int i_mode = *predict_mode;
 
-            if( h->mb.b_lossless )
-                x264_predict_lossless_16x16( h, 0, i_mode );
-            else
-                h->predict_16x16[i_mode]( p_dst );
+                if( h->mb.b_lossless )
+                    x264_predict_lossless_16x16( h, 0, i_mode );
+                else
+                    h->predict_16x16[i_mode]( p_dst );
 
-            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
-                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
-            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
-            a->i_satd_i16x16_dir[i_mode] = i_satd;
+                i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
+                         lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
+                COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
+                a->i_satd_i16x16_dir[i_mode] = i_satd;
+            }
         }
-    }
 
-    if( h->sh.i_type == SLICE_TYPE_B )
-        /* cavlc mb type prefix */
-        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
+        if( h->sh.i_type == SLICE_TYPE_B )
+            /* cavlc mb type prefix */
+            a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
 
-    if( a->i_satd_i16x16 > i16x16_thresh )
-        return;
+        if( a->i_satd_i16x16 > i16x16_thresh )
+            return;
+    }
 
     uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
     /* 8x8 prediction selection */
@@ -870,7 +890,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
 
-            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
+            const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
             h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
 
             if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
@@ -985,7 +1005,7 @@
             int i_best = COST_MAX;
             int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
 
-            predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
+            const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
 
             if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                 /* emulate missing topright samples */
@@ -2101,7 +2121,7 @@
         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
                    + ref_costs + l0_mv_cost + l1_mv_cost;
 
-        if( h->mb.b_chroma_me )
+        if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
         {
             ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
 
​

x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * analyse.h: macroblock analysis
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cabac.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cabac.c: cabac bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cavlc.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cavlc.c: cavlc bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -500,6 +500,9 @@
         && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
     {
         bs_write1( s, MB_INTERLACED );
+#if !RDO_SKIP_BS
+        h->mb.field_decoding_flag = MB_INTERLACED;
+#endif
     }
 
 #if !RDO_SKIP_BS
​

x264-snapshot-20130723-2245.tar.bz2/encoder/encoder.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/encoder.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * encoder.c: top-level encoder functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -33,10 +33,6 @@
 #include "macroblock.h"
 #include "me.h"
 
-#if HAVE_VISUALIZE
-#include "common/visualize.h"
-#endif
-
 //#define DEBUG_MB_TYPE
 
 #define bs_write_ue bs_write_ue_big
@@ -82,7 +78,7 @@
 
 static void x264_frame_dump( x264_t *h )
 {
-    FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
+    FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
     if( !f )
         return;
 
@@ -403,21 +399,6 @@
 {
     if( h->param.i_sync_lookahead )
         x264_lower_thread_priority( 10 );
-
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
-}
-
-static void x264_lookahead_thread_init( x264_t *h )
-{
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
 }
 #endif
 
@@ -486,7 +467,7 @@
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
         return -1;
     }
-    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 )
+    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 )
     {
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" );
         return -1;
@@ -532,6 +513,12 @@
         return -1;
     }
 
+    if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 )
+    {
+        h->param.vui.i_sar_width = 0;
+        h->param.vui.i_sar_height = 0;
+    }
+
     if( h->param.i_threads == X264_THREADS_AUTO )
         h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
     int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
@@ -583,6 +570,8 @@
     {
         h->param.b_intra_refresh = 0;
         h->param.analyse.i_weighted_pred = 0;
+        h->param.i_frame_reference = 1;
+        h->param.i_dpb_size = 1;
     }
 
     h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
@@ -616,6 +605,188 @@
         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
         return -1;
     }
+
+    if( PARAM_INTERLACED )
+        h->param.b_pic_struct = 1;
+
+    if( h->param.i_avcintra_class )
+    {
+        if( BIT_DEPTH != 10 )
+        {
+            x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH );
+            x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" );
+            return -1;
+        }
+
+        int type = h->param.i_avcintra_class == 200 ? 2 :
+                   h->param.i_avcintra_class == 100 ? 1 :
+                   h->param.i_avcintra_class == 50 ? 0 : -1;
+        if( type < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" );
+            return -1;
+        }
+
+        /* [50/100/200][res][fps] */
+        static const struct
+        {
+            uint16_t fps_num;
+            uint16_t fps_den;
+            uint8_t interlaced;
+            uint16_t frame_size;
+            const uint8_t *cqm_4ic;
+            const uint8_t *cqm_8iy;
+        } avcintra_lut[3][2][7] =
+        {
+            {{{ 60000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }},
+             {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              {    25,    1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}},
+            {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    25,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}},
+            {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}
+        };
+
+        int res = -1;
+        if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type )
+        {
+            if(      h->param.i_width == 1440 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width ==  960 && h->param.i_height ==  720 ) res =  0;
+        }
+        else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type )
+        {
+            if(      h->param.i_width == 1920 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width == 1280 && h->param.i_height ==  720 ) res =  0;
+        }
+        else
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( res < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n",
+                      h->param.i_width, h->param.i_height, h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( h->param.nalu_process )
+        {
+            x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        if( !h->param.b_repeat_headers )
+        {
+            x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        int i;
+        uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den;
+        x264_reduce_fraction( &fps_num, &fps_den );
+        for( i = 0; i < 7; i++ )
+        {
+            if( avcintra_lut[type][res][i].fps_num == fps_num &&
+                avcintra_lut[type][res][i].fps_den == fps_den &&
+                avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED )
+            {
+                break;
+            }
+        }
+        if( i == 7 )
+        {
+            x264_log( h, X264_LOG_ERROR, "FPS %d/%d%c not compatible with AVC-Intra\n",
+                      h->param.i_fps_num, h->param.i_fps_den, PARAM_INTERLACED ? 'i' : 'p' );
+            return -1;
+        }
+
+        h->param.i_keyint_max = 1;
+        h->param.b_intra_refresh = 0;
+        h->param.analyse.i_weighted_pred = 0;
+        h->param.i_frame_reference = 1;
+        h->param.i_dpb_size = 1;
+
+        h->param.b_bluray_compat = 0;
+        h->param.b_vfr_input = 0;
+        h->param.b_aud = 1;
+        h->param.vui.i_chroma_loc = 0;
+        h->param.i_nal_hrd = X264_NAL_HRD_NONE;
+        h->param.b_deblocking_filter = 0;
+        h->param.b_stitchable = 1;
+        h->param.b_pic_struct = 0;
+        h->param.analyse.b_transform_8x8 = 1;
+        h->param.analyse.intra = X264_ANALYSE_I8x8;
+        h->param.analyse.i_chroma_qp_offset = res && type ? 3 : 4;
+        h->param.b_cabac = !type;
+        h->param.rc.i_vbv_buffer_size = avcintra_lut[type][res][i].frame_size;
+        h->param.rc.i_vbv_max_bitrate =
+        h->param.rc.i_bitrate = h->param.rc.i_vbv_buffer_size * fps_num / fps_den;
+        h->param.rc.i_rc_method = X264_RC_ABR;
+        h->param.rc.f_vbv_buffer_init = 1.0;
+        h->param.rc.b_filler = 1;
+        h->param.i_cqm_preset = X264_CQM_CUSTOM;
+        memcpy( h->param.cqm_4iy, x264_cqm_jvt4i, sizeof(h->param.cqm_4iy) );
+        memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) );
+        memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) );
+
+        /* Need exactly 10 slices of equal MB count... why?  $deity knows... */
+        h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10;
+        h->param.i_slice_max_size = 0;
+        /* The slice structure only allows a maximum of 2 threads for 1080i/p
+         * and 1 or 5 threads for 720p */
+        if( h->param.b_sliced_threads )
+        {
+            if( res )
+                h->param.i_threads = X264_MIN( 2, h->param.i_threads );
+            else
+            {
+                h->param.i_threads = X264_MIN( 5, h->param.i_threads );
+                if( h->param.i_threads < 5 )
+                    h->param.i_threads = 1;
+            }
+        }
+
+        if( type )
+            h->param.vui.i_sar_width = h->param.vui.i_sar_height = 1;
+        else
+        {
+            h->param.vui.i_sar_width  = 4;
+            h->param.vui.i_sar_height = 3;
+        }
+
+        /* Official encoder doesn't appear to go under 13
+         * and Avid cannot handle negative QPs */
+        h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 );
+    }
+
     h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
     h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
     h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
@@ -917,10 +1088,10 @@
         h->param.analyse.i_chroma_qp_offset += 6;
     /* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
     /* so we lower the chroma QP offset to compensate */
-    if( b_open && h->mb.i_psy_rd )
+    if( b_open && h->mb.i_psy_rd && !h->param.i_avcintra_class )
         h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
     /* Psy trellis has a similar effect. */
-    if( b_open && h->mb.i_psy_trellis )
+    if( b_open && h->mb.i_psy_trellis && !h->param.i_avcintra_class )
         h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
     h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
     /* MB-tree requires AQ to be on, even if the strength is zero. */
@@ -1041,9 +1212,6 @@
 
     h->param.i_sps_id &= 31;
 
-    if( PARAM_INTERLACED )
-        h->param.b_pic_struct = 1;
-
     h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR );
 
     if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size )
@@ -1059,6 +1227,9 @@
         h->param.i_nal_hrd = X264_NAL_HRD_VBR;
     }
 
+    if( h->param.i_nal_hrd == X264_NAL_HRD_CBR )
+        h->param.rc.b_filler = 1;
+
     /* ensure the booleans are 0 or 1 so they can be used in math */
 #define BOOLIFY(x) h->param.x = !!h->param.x
     BOOLIFY( b_cabac );
@@ -1068,7 +1239,6 @@
     BOOLIFY( b_sliced_threads );
     BOOLIFY( b_interlaced );
     BOOLIFY( b_intra_refresh );
-    BOOLIFY( b_visualize );
     BOOLIFY( b_aud );
     BOOLIFY( b_repeat_headers );
     BOOLIFY( b_annexb );
@@ -1094,6 +1264,7 @@
     BOOLIFY( rc.b_stat_write );
     BOOLIFY( rc.b_stat_read );
     BOOLIFY( rc.b_mb_tree );
+    BOOLIFY( rc.b_filler );
 #undef BOOLIFY
 
     return 0;
@@ -1187,7 +1358,6 @@
                 h->param.vui.i_sar_width = i_w;
                 h->param.vui.i_sar_height = i_h;
             }
-            x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
         }
     }
 }
@@ -1241,11 +1411,11 @@
         goto fail;
     }
 
+    x264_set_aspect_ratio( h, &h->param, 1 );
+
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
     x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps );
 
-    x264_set_aspect_ratio( h, &h->param, 1 );
-
     x264_validate_levels( h, 1 );
 
     h->chroma_qp_table = i_chroma_qp_table + 12 + h->pps->i_chroma_qp_index_offset;
@@ -1396,11 +1566,13 @@
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */
     CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
 
+    CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) );
+
     if( h->param.i_threads > 1 &&
         x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
         goto fail;
     if( h->param.i_lookahead_threads > 1 &&
-        x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
+        x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) )
         goto fail;
 
 #if HAVE_OPENCL
@@ -1424,6 +1596,7 @@
             CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) );
             *h->lookahead_thread[i] = *h;
         }
+    *h->reconfig_h = *h;
 
     for( int i = 0; i < h->param.i_threads; i++ )
     {
@@ -1479,7 +1652,7 @@
     if( h->param.psz_dump_yuv )
     {
         /* create or truncate the reconstructed video file */
-        FILE *f = fopen( h->param.psz_dump_yuv, "w" );
+        FILE *f = x264_fopen( h->param.psz_dump_yuv, "w" );
         if( !f )
         {
             x264_log( h, X264_LOG_ERROR, "dump_yuv: can't write to %s\n", h->param.psz_dump_yuv );
@@ -1523,18 +1696,10 @@
     return NULL;
 }
 
-/****************************************************************************
- * x264_encoder_reconfig:
- ****************************************************************************/
-int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
+/****************************************************************************/
+static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig )
 {
-    /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */
-    if( h->param.b_sliced_threads )
-        if( x264_threadpool_wait_all( h ) < 0 )
-            return -1;
-
-    int rc_reconfig = 0;
-    h = h->thread[h->thread[0]->i_thread_phase];
+    *rc_reconfig = 0;
     x264_set_aspect_ratio( h, param, 0 );
 #define COPY(var) h->param.var = param->var
     COPY( i_frame_reference ); // but never uses more refs than initially specified
@@ -1583,22 +1748,30 @@
     if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 &&
           param->rc.i_vbv_max_bitrate > 0 &&   param->rc.i_vbv_buffer_size > 0 )
     {
-        rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
-        rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
-        rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
+        *rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
+        *rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
+        *rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
         COPY( rc.i_vbv_max_bitrate );
         COPY( rc.i_vbv_buffer_size );
         COPY( rc.i_bitrate );
     }
-    rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
-    rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
+    *rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
+    *rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
     COPY( rc.f_rf_constant );
     COPY( rc.f_rf_constant_max );
 #undef COPY
 
-    mbcmp_init( h );
+    return x264_validate_parameters( h, 0 );
+}
 
-    int ret = x264_validate_parameters( h, 0 );
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param )
+{
+    int rc_reconfig;
+    int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig );
+
+    mbcmp_init( h );
+    if( !ret )
+        x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
 
     /* Supported reconfiguration options (1-pass only):
      * vbv-maxrate
@@ -1612,6 +1785,25 @@
 }
 
 /****************************************************************************
+ * x264_encoder_reconfig:
+ ****************************************************************************/
+int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
+{
+    h = h->thread[h->thread[0]->i_thread_phase];
+    x264_param_t param_save = h->reconfig_h->param;
+    h->reconfig_h->param = h->param;
+
+    int rc_reconfig;
+    int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig );
+    if( !ret )
+        h->reconfig = 1;
+    else
+        h->reconfig_h->param = param_save;
+
+    return ret;
+}
+
+/****************************************************************************
  * x264_encoder_parameters:
  ****************************************************************************/
 void x264_encoder_parameters( x264_t *h, x264_param_t *param )
@@ -1630,6 +1822,7 @@
 
     nal->i_payload= 0;
     nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
+    nal->i_padding= 0;
 }
 
 /* if number of allocated nals is not enough, re-allocate a larger one. */
@@ -1663,6 +1856,30 @@
     return x264_nal_check_buffer( h );
 }
 
+static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start,
+                                           int previous_nal_size, int necessary_size )
+{
+    if( h0->nal_buffer_size < necessary_size )
+    {
+        necessary_size *= 2;
+        uint8_t *buf = x264_malloc( necessary_size );
+        if( !buf )
+            return -1;
+        if( previous_nal_size )
+            memcpy( buf, h0->nal_buffer, previous_nal_size );
+
+        intptr_t delta = buf - h0->nal_buffer;
+        for( int i = 0; i < start; i++ )
+            h->out.nal[i].p_payload += delta;
+
+        x264_free( h0->nal_buffer );
+        h0->nal_buffer = buf;
+        h0->nal_buffer_size = necessary_size;
+    }
+
+    return 0;
+}
+
 static int x264_encoder_encapsulate_nals( x264_t *h, int start )
 {
     x264_t *h0 = h->thread[0];
@@ -1683,31 +1900,31 @@
 
     /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
     int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64;
-    if( h0->nal_buffer_size < necessary_size )
-    {
-        necessary_size *= 2;
-        uint8_t *buf = x264_malloc( necessary_size );
-        if( !buf )
-            return -1;
-        if( previous_nal_size )
-            memcpy( buf, h0->nal_buffer, previous_nal_size );
-
-        intptr_t delta = buf - h0->nal_buffer;
-        for( int i = 0; i < start; i++ )
-            h->out.nal[i].p_payload += delta;
-
-        x264_free( h0->nal_buffer );
-        h0->nal_buffer = buf;
-        h0->nal_buffer_size = necessary_size;
-    }
+    for( int i = start; i < h->out.i_nal; i++ )
+        necessary_size += h->out.nal[i].i_padding;
+    if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
+        return -1;
 
     uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size;
 
     for( int i = start; i < h->out.i_nal; i++ )
     {
-        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
+        int old_payload_len = h->out.nal[i].i_payload;
+        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS ||
+                                         h->param.i_avcintra_class;
         x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
         nal_buffer += h->out.nal[i].i_payload;
+        if( h->param.i_avcintra_class )
+        {
+            h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD);
+            if( h->out.nal[i].i_padding > 0 )
+            {
+                memset( nal_buffer, 0, h->out.nal[i].i_padding );
+                nal_buffer += h->out.nal[i].i_padding;
+                h->out.nal[i].i_payload += h->out.nal[i].i_padding;
+            }
+            h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 );
+        }
     }
 
     x264_emms();
@@ -2340,7 +2557,7 @@
     }
 }
 
-static int x264_slice_write( x264_t *h )
+static intptr_t x264_slice_write( x264_t *h )
 {
     int i_skip;
     int mb_xy, i_mb_x, i_mb_y;
@@ -2350,7 +2567,8 @@
      * other inaccuracies. */
     int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
     int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
-    int back_up_bitstream = slice_max_size || (!h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH);
+    int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH;
+    int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc;
     int starting_bits = bs_pos(&h->out.bs);
     int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
     int b_hpel = h->fdec->b_kept_as_ref;
@@ -2358,9 +2576,10 @@
     int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1;
     uint8_t *last_emu_check;
 #define BS_BAK_SLICE_MAX_SIZE 0
-#define BS_BAK_SLICE_MIN_MBS  1
-#define BS_BAK_ROW_VBV        2
-    x264_bs_bak_t bs_bak[3];
+#define BS_BAK_CAVLC_OVERFLOW 1
+#define BS_BAK_SLICE_MIN_MBS  2
+#define BS_BAK_ROW_VBV        3
+    x264_bs_bak_t bs_bak[4];
     b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
     bs_realign( &h->out.bs );
 
@@ -2413,11 +2632,16 @@
                 x264_fdec_filter_row( h, i_mb_y, 0 );
         }
 
-        if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
+        if( back_up_bitstream )
         {
-            x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
-            if( slice_max_size && (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
-                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
+            if( back_up_bitstream_cavlc )
+                x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
+            if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
+            {
+                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
+                if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
+                    x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
+            }
         }
 
         if( PARAM_INTERLACED )
@@ -2481,7 +2705,7 @@
                     h->mb.i_skip_intra = 0;
                     h->mb.b_skip_mc = 0;
                     h->mb.b_overflow = 0;
-                    x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
+                    x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
                     goto reencode;
                 }
             }
@@ -2552,11 +2776,6 @@
 cont:
         h->mb.b_reencode_mb = 0;
 
-#if HAVE_VISUALIZE
-        if( h->param.b_visualize )
-            x264_visualize_mb( h );
-#endif
-
         /* save cache */
         x264_macroblock_cache_save( h );
 
@@ -2732,10 +2951,11 @@
     x264_frame_push_unused( src, dst->fdec );
 
     // copy everything except the per-thread pointers and the constants.
-    memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) );
+    memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.base) - offsetof(x264_t, i_frame) );
     dst->param = src->param;
     dst->stat = src->stat;
     dst->pixf = src->pixf;
+    dst->reconfig = src->reconfig;
 }
 
 static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
@@ -2750,12 +2970,6 @@
     int i_slice_num = 0;
     int last_thread_mb = h->sh.i_last_mb;
 
-#if HAVE_VISUALIZE
-    if( h->param.b_visualize )
-        if( x264_visualize_init( h ) )
-            goto fail;
-#endif
-
     /* init stats */
     memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
     h->mb.b_reencode_mb = 0;
@@ -2801,14 +3015,6 @@
             h->sh.i_first_mb -= h->mb.i_mb_stride;
     }
 
-#if HAVE_VISUALIZE
-    if( h->param.b_visualize )
-    {
-        x264_visualize_show( h );
-        x264_visualize_close( h );
-    }
-#endif
-
     return (void *)0;
 
 fail:
@@ -2949,10 +3155,6 @@
         thread_current =
         thread_oldest  = h;
     }
-#if HAVE_MMX
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
     h->i_cpb_delay_pir_offset = h->i_cpb_delay_pir_offset_next;
 
     /* no data out */
@@ -3058,9 +3260,14 @@
 
     if( h->i_frame == h->i_thread_frames - 1 )
         h->i_reordered_pts_delay = h->fenc->i_reordered_pts;
+    if( h->reconfig )
+    {
+        x264_encoder_reconfig_apply( h, &h->reconfig_h->param );
+        h->reconfig = 0;
+    }
     if( h->fenc->param )
     {
-        x264_encoder_reconfig( h, h->fenc->param );
+        x264_encoder_reconfig_apply( h, h->fenc->param );
         if( h->fenc->param->param_free )
         {
             h->fenc->param->param_free( h->fenc->param );
@@ -3207,7 +3414,7 @@
         bs_rbsp_trailing( &h->out.bs );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
     }
 
     h->i_nal_type = i_nal_type;
@@ -3259,14 +3466,19 @@
             x264_sps_write( &h->out.bs, h->sps );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+            /* Pad AUD/SPS to 256 bytes like Panasonic */
+            if( h->param.i_avcintra_class )
+                h->out.nal[h->out.i_nal-1].i_padding = 256 - bs_pos( &h->out.bs ) / 8 - 2*NALU_OVERHEAD;
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
 
             /* generate picture parameters */
             x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
             x264_pps_write( &h->out.bs, h->sps, h->pps );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+            if( h->param.i_avcintra_class )
+                h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD;
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
         }
 
         /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */
@@ -3277,7 +3489,7 @@
             x264_sei_buffering_period_write( h, &h->out.bs );
             if( x264_nal_end( h ) )
                return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
     }
 
@@ -3289,7 +3501,7 @@
                         h->fenc->extra_sei.payloads[i].payload_type );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         if( h->fenc->extra_sei.sei_free )
         {
             h->fenc->extra_sei.sei_free( h->fenc->extra_sei.payloads[i].payload );
@@ -3306,7 +3518,8 @@
 
     if( h->fenc->b_keyframe )
     {
-        if( h->param.b_repeat_headers && h->fenc->i_frame == 0 )
+        /* Avid's decoder strictly wants two SEIs for AVC-Intra so we can't insert the x264 SEI */
+        if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class )
         {
             /* identify ourself */
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
@@ -3314,7 +3527,7 @@
                 return -1;
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
 
         if( h->fenc->i_type != X264_TYPE_IDR )
@@ -3324,16 +3537,16 @@
             x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
 
-        if ( h->param.i_frame_packing >= 0 )
+        if( h->param.i_frame_packing >= 0 )
         {
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
             x264_sei_frame_packing_write( h, &h->out.bs );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
     }
 
@@ -3344,7 +3557,7 @@
         x264_sei_pic_timing_write( h, &h->out.bs );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
 
     /* As required by Blu-ray. */
@@ -3355,12 +3568,54 @@
         x264_sei_dec_ref_pic_marking_write( h, &h->out.bs );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
 
     if( h->fenc->b_keyframe && h->param.b_intra_refresh )
         h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay;
 
+    /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */
+    if( h->param.i_avcintra_class )
+    {
+        /* Write an empty filler NAL to mimic the AUD in the P2 format*/
+        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
+        x264_filler_write( h, &h->out.bs, 0 );
+        if( x264_nal_end( h ) )
+            return -1;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+
+        /* All lengths are magic lengths that decoders expect to see */
+        /* "UMID" SEI */
+        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 )
+            return -1;
+        if( x264_nal_end( h ) )
+            return -1;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
+
+        int unpadded_len;
+        int total_len;
+        if( h->param.i_height == 1080 )
+        {
+            unpadded_len = 5780;
+            total_len = 17*512;
+        }
+        else
+        {
+            unpadded_len = 2900;
+            total_len = 9*512;
+        }
+        /* "VANC" SEI */
+        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 )
+            return -1;
+        if( x264_nal_end( h ) )
+            return -1;
+
+        h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + SEI_OVERHEAD;
+    }
+
     /* Init the rate control */
     /* FIXME: Include slice header bit cost. */
     x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 );
@@ -3490,30 +3745,46 @@
     pic_out->hrd_timing = h->fenc->hrd_timing;
     pic_out->prop.f_crf_avg = h->fdec->f_crf_avg;
 
-    while( filler > 0 )
+    /* Filler in AVC-Intra mode is written as zero bytes to the last slice
+     * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */
+    if( h->param.i_avcintra_class )
+    {
+        x264_t *h0 = h->thread[0];
+        int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler );
+        if( ret < 0 )
+            return -1;
+        memset( h->out.nal[0].p_payload + frame_size, 0, filler );
+        h->out.nal[h->out.i_nal-1].i_payload += filler;
+        h->out.nal[h->out.i_nal-1].i_padding = filler;
+        frame_size += filler;
+    }
+    else
     {
-        int f, overhead;
-        overhead = (FILLER_OVERHEAD - h->param.b_annexb);
-        if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
-        {
-            int next_size = filler - h->param.i_slice_max_size;
-            int overflow = X264_MAX( overhead - next_size, 0 );
-            f = h->param.i_slice_max_size - overhead - overflow;
-        }
-        else
-            f = X264_MAX( 0, filler - overhead );
+        while( filler > 0 )
+        {
+            int f, overhead;
+            overhead = (FILLER_OVERHEAD - h->param.b_annexb);
+            if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
+            {
+                int next_size = filler - h->param.i_slice_max_size;
+                int overflow = X264_MAX( overhead - next_size, 0 );
+                f = h->param.i_slice_max_size - overhead - overflow;
+            }
+            else
+                f = X264_MAX( 0, filler - overhead );
 
-        if( x264_bitstream_check_buffer_filler( h, f ) )
-            return -1;
-        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
-        x264_filler_write( h, &h->out.bs, f );
-        if( x264_nal_end( h ) )
-            return -1;
-        int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
-        if( total_size < 0 )
-            return -1;
-        frame_size += total_size;
-        filler -= total_size;
+            if( x264_bitstream_check_buffer_filler( h, f ) )
+                return -1;
+            x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
+            x264_filler_write( h, &h->out.bs, f );
+            if( x264_nal_end( h ) )
+                return -1;
+            int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
+            if( total_size < 0 )
+                return -1;
+            frame_size += total_size;
+            filler -= total_size;
+        }
     }
 
     /* End bitstream, set output  */
@@ -3985,6 +4256,7 @@
 
     x264_cqm_delete( h );
     x264_free( h->nal_buffer );
+    x264_free( h->reconfig_h );
     x264_analyse_free_costs( h );
 
     if( h->i_thread_frames > 1 )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * encoder.c: top-level encoder functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -33,10 +33,6 @@
 #include "macroblock.h"
 #include "me.h"
 
-#if HAVE_VISUALIZE
-#include "common/visualize.h"
-#endif
-
 //#define DEBUG_MB_TYPE
 
 #define bs_write_ue bs_write_ue_big
@@ -82,7 +78,7 @@
 
 static void x264_frame_dump( x264_t *h )
 {
-    FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
+    FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
     if( !f )
         return;
 
@@ -403,21 +399,6 @@
 {
     if( h->param.i_sync_lookahead )
         x264_lower_thread_priority( 10 );
-
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
-}
-
-static void x264_lookahead_thread_init( x264_t *h )
-{
-#if HAVE_MMX
-    /* Misalign mask has to be set separately for each thread. */
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
 }
 #endif
 
@@ -486,7 +467,7 @@
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
         return -1;
     }
-    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 )
+    else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 )
     {
         x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" );
         return -1;
@@ -532,6 +513,12 @@
         return -1;
     }
 
+    if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 )
+    {
+        h->param.vui.i_sar_width = 0;
+        h->param.vui.i_sar_height = 0;
+    }
+
     if( h->param.i_threads == X264_THREADS_AUTO )
         h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
     int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
@@ -583,6 +570,8 @@
     {
         h->param.b_intra_refresh = 0;
         h->param.analyse.i_weighted_pred = 0;
+        h->param.i_frame_reference = 1;
+        h->param.i_dpb_size = 1;
     }
 
     h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
@@ -616,6 +605,188 @@
         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
         return -1;
     }
+
+    if( PARAM_INTERLACED )
+        h->param.b_pic_struct = 1;
+
+    if( h->param.i_avcintra_class )
+    {
+        if( BIT_DEPTH != 10 )
+        {
+            x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH );
+            x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" );
+            return -1;
+        }
+
+        int type = h->param.i_avcintra_class == 200 ? 2 :
+                   h->param.i_avcintra_class == 100 ? 1 :
+                   h->param.i_avcintra_class == 50 ? 0 : -1;
+        if( type < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" );
+            return -1;
+        }
+
+        /* [50/100/200][res][fps] */
+        static const struct
+        {
+            uint16_t fps_num;
+            uint16_t fps_den;
+            uint8_t interlaced;
+            uint16_t frame_size;
+            const uint8_t *cqm_4ic;
+            const uint8_t *cqm_8iy;
+        } avcintra_lut[3][2][7] =
+        {
+            {{{ 60000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0,  912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }},
+             {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              {    25,    1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
+              { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    50,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              {    25,    1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
+              { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}},
+            {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    25,    1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}},
+            {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
+              {    50,    1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
+             {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              {    25,    1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
+              { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    50,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              {    25,    1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
+              { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}
+        };
+
+        int res = -1;
+        if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type )
+        {
+            if(      h->param.i_width == 1440 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width ==  960 && h->param.i_height ==  720 ) res =  0;
+        }
+        else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type )
+        {
+            if(      h->param.i_width == 1920 && h->param.i_height == 1080 ) res =  1;
+            else if( h->param.i_width == 1280 && h->param.i_height ==  720 ) res =  0;
+        }
+        else
+        {
+            x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( res < 0 )
+        {
+            x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n",
+                      h->param.i_width, h->param.i_height, h->param.i_avcintra_class );
+            return -1;
+        }
+
+        if( h->param.nalu_process )
+        {
+            x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        if( !h->param.b_repeat_headers )
+        {
+            x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" );
+            return -1;
+        }
+
+        int i;
+        uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den;
+        x264_reduce_fraction( &fps_num, &fps_den );
+        for( i = 0; i < 7; i++ )
+        {
+            if( avcintra_lut[type][res][i].fps_num == fps_num &&
+                avcintra_lut[type][res][i].fps_den == fps_den &&
+                avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED )
+            {
+                break;
+            }
+        }
+        if( i == 7 )
+        {
+            x264_log( h, X264_LOG_ERROR, "FPS %d/%d%c not compatible with AVC-Intra\n",
+                      h->param.i_fps_num, h->param.i_fps_den, PARAM_INTERLACED ? 'i' : 'p' );
+            return -1;
+        }
+
+        h->param.i_keyint_max = 1;
+        h->param.b_intra_refresh = 0;
+        h->param.analyse.i_weighted_pred = 0;
+        h->param.i_frame_reference = 1;
+        h->param.i_dpb_size = 1;
+
+        h->param.b_bluray_compat = 0;
+        h->param.b_vfr_input = 0;
+        h->param.b_aud = 1;
+        h->param.vui.i_chroma_loc = 0;
+        h->param.i_nal_hrd = X264_NAL_HRD_NONE;
+        h->param.b_deblocking_filter = 0;
+        h->param.b_stitchable = 1;
+        h->param.b_pic_struct = 0;
+        h->param.analyse.b_transform_8x8 = 1;
+        h->param.analyse.intra = X264_ANALYSE_I8x8;
+        h->param.analyse.i_chroma_qp_offset = res && type ? 3 : 4;
+        h->param.b_cabac = !type;
+        h->param.rc.i_vbv_buffer_size = avcintra_lut[type][res][i].frame_size;
+        h->param.rc.i_vbv_max_bitrate =
+        h->param.rc.i_bitrate = h->param.rc.i_vbv_buffer_size * fps_num / fps_den;
+        h->param.rc.i_rc_method = X264_RC_ABR;
+        h->param.rc.f_vbv_buffer_init = 1.0;
+        h->param.rc.b_filler = 1;
+        h->param.i_cqm_preset = X264_CQM_CUSTOM;
+        memcpy( h->param.cqm_4iy, x264_cqm_jvt4i, sizeof(h->param.cqm_4iy) );
+        memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) );
+        memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) );
+
+        /* Need exactly 10 slices of equal MB count... why?  $deity knows... */
+        h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10;
+        h->param.i_slice_max_size = 0;
+        /* The slice structure only allows a maximum of 2 threads for 1080i/p
+         * and 1 or 5 threads for 720p */
+        if( h->param.b_sliced_threads )
+        {
+            if( res )
+                h->param.i_threads = X264_MIN( 2, h->param.i_threads );
+            else
+            {
+                h->param.i_threads = X264_MIN( 5, h->param.i_threads );
+                if( h->param.i_threads < 5 )
+                    h->param.i_threads = 1;
+            }
+        }
+
+        if( type )
+            h->param.vui.i_sar_width = h->param.vui.i_sar_height = 1;
+        else
+        {
+            h->param.vui.i_sar_width  = 4;
+            h->param.vui.i_sar_height = 3;
+        }
+
+        /* Official encoder doesn't appear to go under 13
+         * and Avid cannot handle negative QPs */
+        h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 );
+    }
+
     h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
     h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
     h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
@@ -917,10 +1088,10 @@
         h->param.analyse.i_chroma_qp_offset += 6;
     /* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
     /* so we lower the chroma QP offset to compensate */
-    if( b_open && h->mb.i_psy_rd )
+    if( b_open && h->mb.i_psy_rd && !h->param.i_avcintra_class )
         h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
     /* Psy trellis has a similar effect. */
-    if( b_open && h->mb.i_psy_trellis )
+    if( b_open && h->mb.i_psy_trellis && !h->param.i_avcintra_class )
         h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
     h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
     /* MB-tree requires AQ to be on, even if the strength is zero. */
@@ -1041,9 +1212,6 @@
 
     h->param.i_sps_id &= 31;
 
-    if( PARAM_INTERLACED )
-        h->param.b_pic_struct = 1;
-
     h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR );
 
     if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size )
@@ -1059,6 +1227,9 @@
         h->param.i_nal_hrd = X264_NAL_HRD_VBR;
     }
 
+    if( h->param.i_nal_hrd == X264_NAL_HRD_CBR )
+        h->param.rc.b_filler = 1;
+
     /* ensure the booleans are 0 or 1 so they can be used in math */
 #define BOOLIFY(x) h->param.x = !!h->param.x
     BOOLIFY( b_cabac );
@@ -1068,7 +1239,6 @@
     BOOLIFY( b_sliced_threads );
     BOOLIFY( b_interlaced );
     BOOLIFY( b_intra_refresh );
-    BOOLIFY( b_visualize );
     BOOLIFY( b_aud );
     BOOLIFY( b_repeat_headers );
     BOOLIFY( b_annexb );
@@ -1094,6 +1264,7 @@
     BOOLIFY( rc.b_stat_write );
     BOOLIFY( rc.b_stat_read );
     BOOLIFY( rc.b_mb_tree );
+    BOOLIFY( rc.b_filler );
 #undef BOOLIFY
 
     return 0;
@@ -1187,7 +1358,6 @@
                 h->param.vui.i_sar_width = i_w;
                 h->param.vui.i_sar_height = i_h;
             }
-            x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
         }
     }
 }
@@ -1241,11 +1411,11 @@
         goto fail;
     }
 
+    x264_set_aspect_ratio( h, &h->param, 1 );
+
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
     x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps );
 
-    x264_set_aspect_ratio( h, &h->param, 1 );
-
     x264_validate_levels( h, 1 );
 
     h->chroma_qp_table = i_chroma_qp_table + 12 + h->pps->i_chroma_qp_index_offset;
@@ -1396,11 +1566,13 @@
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */
     CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
 
+    CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) );
+
     if( h->param.i_threads > 1 &&
         x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
         goto fail;
     if( h->param.i_lookahead_threads > 1 &&
-        x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
+        x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) )
         goto fail;
 
 #if HAVE_OPENCL
@@ -1424,6 +1596,7 @@
             CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) );
             *h->lookahead_thread[i] = *h;
         }
+    *h->reconfig_h = *h;
 
     for( int i = 0; i < h->param.i_threads; i++ )
     {
@@ -1479,7 +1652,7 @@
     if( h->param.psz_dump_yuv )
     {
         /* create or truncate the reconstructed video file */
-        FILE *f = fopen( h->param.psz_dump_yuv, "w" );
+        FILE *f = x264_fopen( h->param.psz_dump_yuv, "w" );
         if( !f )
         {
             x264_log( h, X264_LOG_ERROR, "dump_yuv: can't write to %s\n", h->param.psz_dump_yuv );
@@ -1523,18 +1696,10 @@
     return NULL;
 }
 
-/****************************************************************************
- * x264_encoder_reconfig:
- ****************************************************************************/
-int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
+/****************************************************************************/
+static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig )
 {
-    /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */
-    if( h->param.b_sliced_threads )
-        if( x264_threadpool_wait_all( h ) < 0 )
-            return -1;
-
-    int rc_reconfig = 0;
-    h = h->thread[h->thread[0]->i_thread_phase];
+    *rc_reconfig = 0;
     x264_set_aspect_ratio( h, param, 0 );
 #define COPY(var) h->param.var = param->var
     COPY( i_frame_reference ); // but never uses more refs than initially specified
@@ -1583,22 +1748,30 @@
     if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 &&
           param->rc.i_vbv_max_bitrate > 0 &&   param->rc.i_vbv_buffer_size > 0 )
     {
-        rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
-        rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
-        rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
+        *rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
+        *rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
+        *rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
         COPY( rc.i_vbv_max_bitrate );
         COPY( rc.i_vbv_buffer_size );
         COPY( rc.i_bitrate );
     }
-    rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
-    rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
+    *rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
+    *rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
     COPY( rc.f_rf_constant );
     COPY( rc.f_rf_constant_max );
 #undef COPY
 
-    mbcmp_init( h );
+    return x264_validate_parameters( h, 0 );
+}
 
-    int ret = x264_validate_parameters( h, 0 );
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param )
+{
+    int rc_reconfig;
+    int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig );
+
+    mbcmp_init( h );
+    if( !ret )
+        x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
 
     /* Supported reconfiguration options (1-pass only):
      * vbv-maxrate
@@ -1612,6 +1785,25 @@
 }
 
 /****************************************************************************
+ * x264_encoder_reconfig:
+ ****************************************************************************/
+int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
+{
+    h = h->thread[h->thread[0]->i_thread_phase];
+    x264_param_t param_save = h->reconfig_h->param;
+    h->reconfig_h->param = h->param;
+
+    int rc_reconfig;
+    int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig );
+    if( !ret )
+        h->reconfig = 1;
+    else
+        h->reconfig_h->param = param_save;
+
+    return ret;
+}
+
+/****************************************************************************
  * x264_encoder_parameters:
  ****************************************************************************/
 void x264_encoder_parameters( x264_t *h, x264_param_t *param )
@@ -1630,6 +1822,7 @@
 
     nal->i_payload= 0;
     nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
+    nal->i_padding= 0;
 }
 
 /* if number of allocated nals is not enough, re-allocate a larger one. */
@@ -1663,6 +1856,30 @@
     return x264_nal_check_buffer( h );
 }
 
+static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start,
+                                           int previous_nal_size, int necessary_size )
+{
+    if( h0->nal_buffer_size < necessary_size )
+    {
+        necessary_size *= 2;
+        uint8_t *buf = x264_malloc( necessary_size );
+        if( !buf )
+            return -1;
+        if( previous_nal_size )
+            memcpy( buf, h0->nal_buffer, previous_nal_size );
+
+        intptr_t delta = buf - h0->nal_buffer;
+        for( int i = 0; i < start; i++ )
+            h->out.nal[i].p_payload += delta;
+
+        x264_free( h0->nal_buffer );
+        h0->nal_buffer = buf;
+        h0->nal_buffer_size = necessary_size;
+    }
+
+    return 0;
+}
+
 static int x264_encoder_encapsulate_nals( x264_t *h, int start )
 {
     x264_t *h0 = h->thread[0];
@@ -1683,31 +1900,31 @@
 
     /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
     int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64;
-    if( h0->nal_buffer_size < necessary_size )
-    {
-        necessary_size *= 2;
-        uint8_t *buf = x264_malloc( necessary_size );
-        if( !buf )
-            return -1;
-        if( previous_nal_size )
-            memcpy( buf, h0->nal_buffer, previous_nal_size );
-
-        intptr_t delta = buf - h0->nal_buffer;
-        for( int i = 0; i < start; i++ )
-            h->out.nal[i].p_payload += delta;
-
-        x264_free( h0->nal_buffer );
-        h0->nal_buffer = buf;
-        h0->nal_buffer_size = necessary_size;
-    }
+    for( int i = start; i < h->out.i_nal; i++ )
+        necessary_size += h->out.nal[i].i_padding;
+    if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
+        return -1;
 
     uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size;
 
     for( int i = start; i < h->out.i_nal; i++ )
     {
-        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
+        int old_payload_len = h->out.nal[i].i_payload;
+        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS ||
+                                         h->param.i_avcintra_class;
         x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
         nal_buffer += h->out.nal[i].i_payload;
+        if( h->param.i_avcintra_class )
+        {
+            h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD);
+            if( h->out.nal[i].i_padding > 0 )
+            {
+                memset( nal_buffer, 0, h->out.nal[i].i_padding );
+                nal_buffer += h->out.nal[i].i_padding;
+                h->out.nal[i].i_payload += h->out.nal[i].i_padding;
+            }
+            h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 );
+        }
     }
 
     x264_emms();
@@ -2340,7 +2557,7 @@
     }
 }
 
-static int x264_slice_write( x264_t *h )
+static intptr_t x264_slice_write( x264_t *h )
 {
     int i_skip;
     int mb_xy, i_mb_x, i_mb_y;
@@ -2350,7 +2567,8 @@
      * other inaccuracies. */
     int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
     int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
-    int back_up_bitstream = slice_max_size || (!h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH);
+    int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH;
+    int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc;
     int starting_bits = bs_pos(&h->out.bs);
     int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
     int b_hpel = h->fdec->b_kept_as_ref;
@@ -2358,9 +2576,10 @@
     int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1;
     uint8_t *last_emu_check;
 #define BS_BAK_SLICE_MAX_SIZE 0
-#define BS_BAK_SLICE_MIN_MBS  1
-#define BS_BAK_ROW_VBV        2
-    x264_bs_bak_t bs_bak[3];
+#define BS_BAK_CAVLC_OVERFLOW 1
+#define BS_BAK_SLICE_MIN_MBS  2
+#define BS_BAK_ROW_VBV        3
+    x264_bs_bak_t bs_bak[4];
     b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
     bs_realign( &h->out.bs );
 
@@ -2413,11 +2632,16 @@
                 x264_fdec_filter_row( h, i_mb_y, 0 );
         }
 
-        if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
+        if( back_up_bitstream )
         {
-            x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
-            if( slice_max_size && (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
-                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
+            if( back_up_bitstream_cavlc )
+                x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
+            if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
+            {
+                x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
+                if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
+                    x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
+            }
         }
 
         if( PARAM_INTERLACED )
@@ -2481,7 +2705,7 @@
                     h->mb.i_skip_intra = 0;
                     h->mb.b_skip_mc = 0;
                     h->mb.b_overflow = 0;
-                    x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
+                    x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
                     goto reencode;
                 }
             }
@@ -2552,11 +2776,6 @@
 cont:
         h->mb.b_reencode_mb = 0;
 
-#if HAVE_VISUALIZE
-        if( h->param.b_visualize )
-            x264_visualize_mb( h );
-#endif
-
         /* save cache */
         x264_macroblock_cache_save( h );
 
@@ -2732,10 +2951,11 @@
     x264_frame_push_unused( src, dst->fdec );
 
     // copy everything except the per-thread pointers and the constants.
-    memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) );
+    memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.base) - offsetof(x264_t, i_frame) );
     dst->param = src->param;
     dst->stat = src->stat;
     dst->pixf = src->pixf;
+    dst->reconfig = src->reconfig;
 }
 
 static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
@@ -2750,12 +2970,6 @@
     int i_slice_num = 0;
     int last_thread_mb = h->sh.i_last_mb;
 
-#if HAVE_VISUALIZE
-    if( h->param.b_visualize )
-        if( x264_visualize_init( h ) )
-            goto fail;
-#endif
-
     /* init stats */
     memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
     h->mb.b_reencode_mb = 0;
@@ -2801,14 +3015,6 @@
             h->sh.i_first_mb -= h->mb.i_mb_stride;
     }
 
-#if HAVE_VISUALIZE
-    if( h->param.b_visualize )
-    {
-        x264_visualize_show( h );
-        x264_visualize_close( h );
-    }
-#endif
-
     return (void *)0;
 
 fail:
@@ -2949,10 +3155,6 @@
         thread_current =
         thread_oldest  = h;
     }
-#if HAVE_MMX
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
     h->i_cpb_delay_pir_offset = h->i_cpb_delay_pir_offset_next;
 
     /* no data out */
@@ -3058,9 +3260,14 @@
 
     if( h->i_frame == h->i_thread_frames - 1 )
         h->i_reordered_pts_delay = h->fenc->i_reordered_pts;
+    if( h->reconfig )
+    {
+        x264_encoder_reconfig_apply( h, &h->reconfig_h->param );
+        h->reconfig = 0;
+    }
     if( h->fenc->param )
     {
-        x264_encoder_reconfig( h, h->fenc->param );
+        x264_encoder_reconfig_apply( h, h->fenc->param );
         if( h->fenc->param->param_free )
         {
             h->fenc->param->param_free( h->fenc->param );
@@ -3207,7 +3414,7 @@
         bs_rbsp_trailing( &h->out.bs );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
     }
 
     h->i_nal_type = i_nal_type;
@@ -3259,14 +3466,19 @@
             x264_sps_write( &h->out.bs, h->sps );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+            /* Pad AUD/SPS to 256 bytes like Panasonic */
+            if( h->param.i_avcintra_class )
+                h->out.nal[h->out.i_nal-1].i_padding = 256 - bs_pos( &h->out.bs ) / 8 - 2*NALU_OVERHEAD;
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
 
             /* generate picture parameters */
             x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
             x264_pps_write( &h->out.bs, h->sps, h->pps );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+            if( h->param.i_avcintra_class )
+                h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD;
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
         }
 
         /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */
@@ -3277,7 +3489,7 @@
             x264_sei_buffering_period_write( h, &h->out.bs );
             if( x264_nal_end( h ) )
                return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
     }
 
@@ -3289,7 +3501,7 @@
                         h->fenc->extra_sei.payloads[i].payload_type );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         if( h->fenc->extra_sei.sei_free )
         {
             h->fenc->extra_sei.sei_free( h->fenc->extra_sei.payloads[i].payload );
@@ -3306,7 +3518,8 @@
 
     if( h->fenc->b_keyframe )
     {
-        if( h->param.b_repeat_headers && h->fenc->i_frame == 0 )
+        /* Avid's decoder strictly wants two SEIs for AVC-Intra so we can't insert the x264 SEI */
+        if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class )
         {
             /* identify ourself */
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
@@ -3314,7 +3527,7 @@
                 return -1;
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
 
         if( h->fenc->i_type != X264_TYPE_IDR )
@@ -3324,16 +3537,16 @@
             x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
 
-        if ( h->param.i_frame_packing >= 0 )
+        if( h->param.i_frame_packing >= 0 )
         {
             x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
             x264_sei_frame_packing_write( h, &h->out.bs );
             if( x264_nal_end( h ) )
                 return -1;
-            overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+            overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
         }
     }
 
@@ -3344,7 +3557,7 @@
         x264_sei_pic_timing_write( h, &h->out.bs );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
 
     /* As required by Blu-ray. */
@@ -3355,12 +3568,54 @@
         x264_sei_dec_ref_pic_marking_write( h, &h->out.bs );
         if( x264_nal_end( h ) )
             return -1;
-        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
     }
 
     if( h->fenc->b_keyframe && h->param.b_intra_refresh )
         h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay;
 
+    /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */
+    if( h->param.i_avcintra_class )
+    {
+        /* Write an empty filler NAL to mimic the AUD in the P2 format*/
+        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
+        x264_filler_write( h, &h->out.bs, 0 );
+        if( x264_nal_end( h ) )
+            return -1;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
+
+        /* All lengths are magic lengths that decoders expect to see */
+        /* "UMID" SEI */
+        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 )
+            return -1;
+        if( x264_nal_end( h ) )
+            return -1;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
+
+        int unpadded_len;
+        int total_len;
+        if( h->param.i_height == 1080 )
+        {
+            unpadded_len = 5780;
+            total_len = 17*512;
+        }
+        else
+        {
+            unpadded_len = 2900;
+            total_len = 9*512;
+        }
+        /* "VANC" SEI */
+        x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
+        if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 )
+            return -1;
+        if( x264_nal_end( h ) )
+            return -1;
+
+        h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD;
+        overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + SEI_OVERHEAD;
+    }
+
     /* Init the rate control */
     /* FIXME: Include slice header bit cost. */
     x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 );
@@ -3490,30 +3745,46 @@
     pic_out->hrd_timing = h->fenc->hrd_timing;
     pic_out->prop.f_crf_avg = h->fdec->f_crf_avg;
 
-    while( filler > 0 )
+    /* Filler in AVC-Intra mode is written as zero bytes to the last slice
+     * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */
+    if( h->param.i_avcintra_class )
+    {
+        x264_t *h0 = h->thread[0];
+        int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler );
+        if( ret < 0 )
+            return -1;
+        memset( h->out.nal[0].p_payload + frame_size, 0, filler );
+        h->out.nal[h->out.i_nal-1].i_payload += filler;
+        h->out.nal[h->out.i_nal-1].i_padding = filler;
+        frame_size += filler;
+    }
+    else
     {
-        int f, overhead;
-        overhead = (FILLER_OVERHEAD - h->param.b_annexb);
-        if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
-        {
-            int next_size = filler - h->param.i_slice_max_size;
-            int overflow = X264_MAX( overhead - next_size, 0 );
-            f = h->param.i_slice_max_size - overhead - overflow;
-        }
-        else
-            f = X264_MAX( 0, filler - overhead );
+        while( filler > 0 )
+        {
+            int f, overhead;
+            overhead = (FILLER_OVERHEAD - h->param.b_annexb);
+            if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
+            {
+                int next_size = filler - h->param.i_slice_max_size;
+                int overflow = X264_MAX( overhead - next_size, 0 );
+                f = h->param.i_slice_max_size - overhead - overflow;
+            }
+            else
+                f = X264_MAX( 0, filler - overhead );
 
-        if( x264_bitstream_check_buffer_filler( h, f ) )
-            return -1;
-        x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
-        x264_filler_write( h, &h->out.bs, f );
-        if( x264_nal_end( h ) )
-            return -1;
-        int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
-        if( total_size < 0 )
-            return -1;
-        frame_size += total_size;
-        filler -= total_size;
+            if( x264_bitstream_check_buffer_filler( h, f ) )
+                return -1;
+            x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
+            x264_filler_write( h, &h->out.bs, f );
+            if( x264_nal_end( h ) )
+                return -1;
+            int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
+            if( total_size < 0 )
+                return -1;
+            frame_size += total_size;
+            filler -= total_size;
+        }
     }
 
     /* End bitstream, set output  */
@@ -3985,6 +4256,7 @@
 
     x264_cqm_delete( h );
     x264_free( h->nal_buffer );
+    x264_free( h->reconfig_h );
     x264_analyse_free_costs( h );
 
     if( h->i_thread_frames > 1 )
​

x264-snapshot-20130723-2245.tar.bz2/encoder/lookahead.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/lookahead.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lookahead.c: high-level lookahead functions
  *****************************************************************************
- * Copyright (C) 2010-2013 Avail Media and x264 project
+ * Copyright (C) 2010-2014 Avail Media and x264 project
  *
  * Authors: Michael Kazmier <mkazmier@availmedia.com>
  *          Alex Giladi <agiladi@availmedia.com>
@@ -89,16 +89,11 @@
 
 static void *x264_lookahead_thread( x264_t *h )
 {
-    int shift;
-#if HAVE_MMX
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
     while( !h->lookahead->b_exit_thread )
     {
         x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
         x264_pthread_mutex_lock( &h->lookahead->next.mutex );
-        shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
+        int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
         x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
         x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
         if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lookahead.c: high-level lookahead functions
  *****************************************************************************
- * Copyright (C) 2010-2013 Avail Media and x264 project
+ * Copyright (C) 2010-2014 Avail Media and x264 project
  *
  * Authors: Michael Kazmier <mkazmier@availmedia.com>
  *          Alex Giladi <agiladi@availmedia.com>
@@ -89,16 +89,11 @@
 
 static void *x264_lookahead_thread( x264_t *h )
 {
-    int shift;
-#if HAVE_MMX
-    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
-        x264_cpu_mask_misalign_sse();
-#endif
     while( !h->lookahead->b_exit_thread )
     {
         x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
         x264_pthread_mutex_lock( &h->lookahead->next.mutex );
-        shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
+        int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
         x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
         x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
         if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
​

x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.c Changed

@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -157,10 +157,7 @@
         return;
     }
 
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0;
+    CLEAR_16x16_NNZ( p );
 
     h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );

 
@@ -1,12 +1,12 @@
 /*****************************************************************************
  * macroblock.c: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
- *          Henrik Gramner <hengar-6@student.ltu.se>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -157,10 +157,7 @@
         return;
     }
 
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0;
-    M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0;
+    CLEAR_16x16_NNZ( p );
 
     h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
 
​

x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -98,10 +98,10 @@
 #define CLEAR_16x16_NNZ( p ) \
 do\
 {\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
 } while(0)
 
 /* A special for loop that iterates branchlessly over each set

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * macroblock.h: macroblock encoding
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -98,10 +98,10 @@
 #define CLEAR_16x16_NNZ( p ) \
 do\
 {\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
-    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
+    M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
 } while(0)
 
 /* A special for loop that iterates branchlessly over each set
​

x264-snapshot-20130723-2245.tar.bz2/encoder/me.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.c: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -365,14 +365,14 @@
 
             /* hexagon */
             COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
-            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
+            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */
             bcost <<= 3;
             COPY1_IF_LT( bcost, (costs[0]<<3)+2 );
             COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
             COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
-            COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
-            COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
-            COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
+            COPY1_IF_LT( bcost, (costs[4]<<3)+5 );
+            COPY1_IF_LT( bcost, (costs[5]<<3)+6 );
+            COPY1_IF_LT( bcost, (costs[6]<<3)+7 );
 
             if( bcost&7 )
             {
@@ -671,7 +671,7 @@
                     for( i = 0; i < xn-2; i += 3 )
                     {
                         pixel *ref = p_fref_w+min_x+my*stride;
-                        int sads[3];
+                        ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */
                         h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
                         for( int j = 0; j < 3; j++ )
                         {

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.c: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -365,14 +365,14 @@
 
             /* hexagon */
             COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );
-            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );
+            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */
             bcost <<= 3;
             COPY1_IF_LT( bcost, (costs[0]<<3)+2 );
             COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
             COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
-            COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
-            COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
-            COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
+            COPY1_IF_LT( bcost, (costs[4]<<3)+5 );
+            COPY1_IF_LT( bcost, (costs[5]<<3)+6 );
+            COPY1_IF_LT( bcost, (costs[6]<<3)+7 );
 
             if( bcost&7 )
             {
@@ -671,7 +671,7 @@
                     for( i = 0; i < xn-2; i += 3 )
                     {
                         pixel *ref = p_fref_w+min_x+my*stride;
-                        int sads[3];
+                        ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */
                         h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
                         for( int j = 0; j < 3; j++ )
                         {
​

x264-snapshot-20130723-2245.tar.bz2/encoder/me.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * me.h: motion estimation
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.c: ratecontrol
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Michael Niedermayer <michaelni@gmx.at>
@@ -101,7 +101,7 @@
     double vbv_max_rate;        /* # of bits added to buffer_fill per second */
     predictor_t *pred;          /* predict frame size from satd */
     int single_frame_vbv;
-    double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
+    float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
 
     /* ABR stuff */
     int    last_satd;
@@ -653,8 +653,9 @@
                       h->param.rc.i_vbv_buffer_size );
         }
 
-        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
-        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
+        int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000;
+        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size;
+        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size;
 
         /* Init HRD */
         if( h->param.i_nal_hrd && b_init )
@@ -666,15 +667,12 @@
             #define BR_SHIFT  6
             #define CPB_SHIFT 4
 
-            int bitrate = 1000*h->param.rc.i_vbv_max_bitrate;
-            int bufsize = 1000*h->param.rc.i_vbv_buffer_size;
-
             // normalize HRD size and rate to the value / scale notation
-            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
+            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
             h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
-            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
+            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
             h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
 
             #undef CPB_SHIFT
@@ -705,7 +703,7 @@
         h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
 
         if( rc->b_vbv_min_rate )
-            rc->bitrate = h->param.rc.i_bitrate * 1000.;
+            rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size;
         rc->buffer_rate = vbv_max_bitrate / rc->fps;
         rc->vbv_max_rate = vbv_max_bitrate;
         rc->buffer_size = vbv_buffer_size;
@@ -761,7 +759,7 @@
     else
         rc->qcompress = h->param.rc.f_qcompress;
 
-    rc->bitrate = h->param.rc.i_bitrate * 1000.;
+    rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.);
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
     rc->nmb = h->mb.i_mb_count;
     rc->last_non_b_pict_type = -1;
@@ -872,7 +870,7 @@
             char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
             if( !mbtree_stats_in )
                 return -1;
-            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
+            rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
             x264_free( mbtree_stats_in );
             if( !rc->p_mbtree_stat_file_in )
             {
@@ -913,7 +911,7 @@
              * so we'll at least try to roughly approximate this effect. */
             res_factor_bits = powf( res_factor, 0.7 );
 
-            if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
+            if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
             {
                 x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" );
                 return -1;
@@ -1140,7 +1138,7 @@
         if( !rc->psz_stat_file_tmpname )
             return -1;
 
-        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
+        rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" );
         if( rc->p_stat_file_out == NULL )
         {
             x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
@@ -1158,7 +1156,7 @@
             if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
                 return -1;
 
-            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
+            rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
             if( rc->p_mbtree_stat_file_out == NULL )
             {
                 x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
@@ -1338,7 +1336,7 @@
         b_regular_file = x264_is_regular_file( rc->p_stat_file_out );
         fclose( rc->p_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
+            if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
@@ -1350,7 +1348,7 @@
         b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out );
         fclose( rc->p_mbtree_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
+            if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
@@ -1398,7 +1396,7 @@
     x264_emms();
 
     if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
-        x264_encoder_reconfig( h, zone->param );
+        x264_encoder_reconfig_apply( h, zone->param );
     rc->prev_zone = zone;
 
     if( h->param.rc.b_stat_read )
@@ -2108,15 +2106,25 @@
     rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
 
     if( rct->buffer_fill_final < 0 )
-        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
+    {
+        double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale;
+        if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment )
+            x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow );
+        else
+            x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow );
+    }
     rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
-    rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
 
-    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
+    if( h->param.i_avcintra_class )
+        rct->buffer_fill_final += buffer_size;
+    else
+        rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
+
+    if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size )
     {
         int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
         filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
-        bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
+        bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
         rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
     }
     else
@@ -2719,7 +2727,7 @@
      * we're adding or removing bits), and starting on the earliest frame that
      * can influence the buffer fill of that end frame. */
     x264_ratecontrol_t *rcc = h->rc;
-    const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
+    const double buffer_min = .1 * rcc->buffer_size;
     const double buffer_max = .9 * rcc->buffer_size;
     double fill = fills[*t0-1];
     double parity = over ? 1. : -1.;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.c: ratecontrol
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Michael Niedermayer <michaelni@gmx.at>
@@ -101,7 +101,7 @@
     double vbv_max_rate;        /* # of bits added to buffer_fill per second */
     predictor_t *pred;          /* predict frame size from satd */
     int single_frame_vbv;
-    double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
+    float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
 
     /* ABR stuff */
     int    last_satd;
@@ -653,8 +653,9 @@
                       h->param.rc.i_vbv_buffer_size );
         }
 
-        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
-        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
+        int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000;
+        int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size;
+        int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size;
 
         /* Init HRD */
         if( h->param.i_nal_hrd && b_init )
@@ -666,15 +667,12 @@
             #define BR_SHIFT  6
             #define CPB_SHIFT 4
 
-            int bitrate = 1000*h->param.rc.i_vbv_max_bitrate;
-            int bufsize = 1000*h->param.rc.i_vbv_buffer_size;
-
             // normalize HRD size and rate to the value / scale notation
-            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
+            h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
             h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
-            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 );
-            h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
+            h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 );
+            h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
             h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
 
             #undef CPB_SHIFT
@@ -705,7 +703,7 @@
         h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
 
         if( rc->b_vbv_min_rate )
-            rc->bitrate = h->param.rc.i_bitrate * 1000.;
+            rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size;
         rc->buffer_rate = vbv_max_bitrate / rc->fps;
         rc->vbv_max_rate = vbv_max_bitrate;
         rc->buffer_size = vbv_buffer_size;
@@ -761,7 +759,7 @@
     else
         rc->qcompress = h->param.rc.f_qcompress;
 
-    rc->bitrate = h->param.rc.i_bitrate * 1000.;
+    rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.);
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
     rc->nmb = h->mb.i_mb_count;
     rc->last_non_b_pict_type = -1;
@@ -872,7 +870,7 @@
             char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
             if( !mbtree_stats_in )
                 return -1;
-            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
+            rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
             x264_free( mbtree_stats_in );
             if( !rc->p_mbtree_stat_file_in )
             {
@@ -913,7 +911,7 @@
              * so we'll at least try to roughly approximate this effect. */
             res_factor_bits = powf( res_factor, 0.7 );
 
-            if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
+            if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
             {
                 x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" );
                 return -1;
@@ -1140,7 +1138,7 @@
         if( !rc->psz_stat_file_tmpname )
             return -1;
 
-        rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
+        rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" );
         if( rc->p_stat_file_out == NULL )
         {
             x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
@@ -1158,7 +1156,7 @@
             if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
                 return -1;
 
-            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
+            rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
             if( rc->p_mbtree_stat_file_out == NULL )
             {
                 x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
@@ -1338,7 +1336,7 @@
         b_regular_file = x264_is_regular_file( rc->p_stat_file_out );
         fclose( rc->p_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
+            if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
@@ -1350,7 +1348,7 @@
         b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out );
         fclose( rc->p_mbtree_stat_file_out );
         if( h->i_frame >= rc->num_entries && b_regular_file )
-            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
+            if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
             {
                 x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
                           rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
@@ -1398,7 +1396,7 @@
     x264_emms();
 
     if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
-        x264_encoder_reconfig( h, zone->param );
+        x264_encoder_reconfig_apply( h, zone->param );
     rc->prev_zone = zone;
 
     if( h->param.rc.b_stat_read )
@@ -2108,15 +2106,25 @@
     rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
 
     if( rct->buffer_fill_final < 0 )
-        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
+    {
+        double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale;
+        if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment )
+            x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow );
+        else
+            x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow );
+    }
     rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
-    rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
 
-    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
+    if( h->param.i_avcintra_class )
+        rct->buffer_fill_final += buffer_size;
+    else
+        rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
+
+    if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size )
     {
         int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
         filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
-        bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
+        bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
         rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
     }
     else
@@ -2719,7 +2727,7 @@
      * we're adding or removing bits), and starting on the earliest frame that
      * can influence the buffer fill of that end frame. */
     x264_ratecontrol_t *rcc = h->rc;
-    const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
+    const double buffer_min = .1 * rcc->buffer_size;
     const double buffer_max = .9 * rcc->buffer_size;
     double fill = fills[*t0-1];
     double parity = over ? 1. : -1.;
​

x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * ratecontrol.h: ratecontrol
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -43,6 +43,7 @@
 void x264_ratecontrol_delete( x264_t * );
 
 void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param );
 
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets );
 int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
​

x264-snapshot-20130723-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/rdo.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * rdo.c: rate-distortion optimization
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/encoder/set.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set: header writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -31,6 +31,7 @@
 
 // Indexed by pic_struct values
 static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
+const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
 
 static void transpose( uint8_t *buf, int w )
 {
@@ -91,7 +92,7 @@
     bs_write( s, 8, payload_size-i );
 
     for( i = 0; i < payload_size; i++ )
-        bs_write(s, 8, payload[i] );
+        bs_write( s, 8, payload[i] );
 
     bs_rbsp_trailing( s );
     bs_flush( s );
@@ -227,7 +228,8 @@
     }
 
     /* FIXME: not sufficient for interlaced video */
-    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
+    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
+                                         sps->i_chroma_format_idc == CHROMA_420;
     if( sps->vui.b_chroma_loc_info_present )
     {
         sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
@@ -249,7 +251,7 @@
 
     // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
 
-    sps->vui.b_bitstream_restriction = 1;
+    sps->vui.b_bitstream_restriction = param->i_keyint_max > 1;
     if( sps->vui.b_bitstream_restriction )
     {
         sps->vui.b_motion_vectors_over_pic_boundaries = 1;
@@ -421,7 +423,7 @@
     pps->i_sps_id = sps->i_id;
     pps->b_cabac = param->b_cabac;
 
-    pps->b_pic_order = param->b_interlaced;
+    pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
     pps->i_num_slice_groups = 1;
 
     pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
@@ -575,7 +577,7 @@
 
     memcpy( payload, uuid, 16 );
     sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
-             "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s",
+             "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s",
              X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
     length = strlen(payload)+1;
 
@@ -725,6 +727,49 @@
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
 }
 
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
+{
+    uint8_t data[512];
+    const char *msg = "UMID";
+    const int len = 497;
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    data[20] = 0x13;
+    /* These bytes appear to be some sort of frame/seconds counter in certain applications,
+     * but others jump around, so leave them as zero for now */
+    data[21] = data[22] = 0;
+
+    data[28] = 0x14;
+    data[36] = 0x60;
+    data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
+{
+    uint8_t data[6000];
+    const char *msg = "VANC";
+    if( len > sizeof(data) )
+    {
+        x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
+        return -1;
+    }
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
 const x264_level_t x264_levels[] =
 {
     { 10,    1485,    99,    396,     64,    175,  64, 64,  0, 2, 0, 0, 1 },

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set: header writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -31,6 +31,7 @@
 
 // Indexed by pic_struct values
 static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
+const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
 
 static void transpose( uint8_t *buf, int w )
 {
@@ -91,7 +92,7 @@
     bs_write( s, 8, payload_size-i );
 
     for( i = 0; i < payload_size; i++ )
-        bs_write(s, 8, payload[i] );
+        bs_write( s, 8, payload[i] );
 
     bs_rbsp_trailing( s );
     bs_flush( s );
@@ -227,7 +228,8 @@
     }
 
     /* FIXME: not sufficient for interlaced video */
-    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
+    sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
+                                         sps->i_chroma_format_idc == CHROMA_420;
     if( sps->vui.b_chroma_loc_info_present )
     {
         sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
@@ -249,7 +251,7 @@
 
     // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
 
-    sps->vui.b_bitstream_restriction = 1;
+    sps->vui.b_bitstream_restriction = param->i_keyint_max > 1;
     if( sps->vui.b_bitstream_restriction )
     {
         sps->vui.b_motion_vectors_over_pic_boundaries = 1;
@@ -421,7 +423,7 @@
     pps->i_sps_id = sps->i_id;
     pps->b_cabac = param->b_cabac;
 
-    pps->b_pic_order = param->b_interlaced;
+    pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
     pps->i_num_slice_groups = 1;
 
     pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
@@ -575,7 +577,7 @@
 
     memcpy( payload, uuid, 16 );
     sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
-             "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s",
+             "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s",
              X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
     length = strlen(payload)+1;
 
@@ -725,6 +727,49 @@
     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
 }
 
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
+{
+    uint8_t data[512];
+    const char *msg = "UMID";
+    const int len = 497;
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    data[20] = 0x13;
+    /* These bytes appear to be some sort of frame/seconds counter in certain applications,
+     * but others jump around, so leave them as zero for now */
+    data[21] = data[22] = 0;
+
+    data[28] = 0x14;
+    data[36] = 0x60;
+    data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
+{
+    uint8_t data[6000];
+    const char *msg = "VANC";
+    if( len > sizeof(data) )
+    {
+        x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
+        return -1;
+    }
+
+    memset( data, 0xff, len );
+    memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
+    memcpy( data+16, msg, strlen(msg) );
+
+    x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
+
+    return 0;
+}
+
 const x264_level_t x264_levels[] =
 {
     { 10,    1485,    99,    396,     64,    175,  64, 64,  0, 2, 0, 0, 1 },
​

x264-snapshot-20130723-2245.tar.bz2/encoder/set.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * set.h: header writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -38,6 +38,8 @@
 void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
 void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
 void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
+int  x264_sei_avcintra_umid_write( x264_t *h, bs_t *s );
+int  x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len );
 void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
 void x264_filler_write( x264_t *h, bs_t *s, int filler );
 
​

x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype-cl.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype-cl.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead)
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype.c: lookahead analysis
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -1022,9 +1022,12 @@
     return i_score;
 }
 
+/* Trade off precision in mbtree for increased range */
+#define MBTREE_PRECISION 0.5f
+
 static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
 {
-    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
+    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
     float weightdelta = 0.0;
     if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
         weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
@@ -1051,11 +1054,12 @@
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
     int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
     int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
-    int *buf = h->scratch_buffer;
+    int16_t *buf = h->scratch_buffer;
     uint16_t *propagate_cost = frames[b]->i_propagate_cost;
+    uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
 
     x264_emms();
-    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
+    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
 
     /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
     if( !referenced )
@@ -1065,72 +1069,17 @@
     {
         int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
         h->mc.mbtree_propagate_cost( buf, propagate_cost,
-            frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
+            frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
             frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
         if( referenced )
             propagate_cost += h->mb.i_mb_width;
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
+
+        h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
+                                     bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
+        if( b != p1 )
         {
-            int propagate_amount = buf[h->mb.i_mb_x];
-            /* Don't propagate for an intra block. */
-            if( propagate_amount > 0 )
-            {
-                /* Access width-2 bitfield. */
-                int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
-                /* Follow the MVs to the previous frame(s). */
-                for( int list = 0; list < 2; list++ )
-                    if( (lists_used >> list)&1 )
-                    {
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
-                        int listamount = propagate_amount;
-                        /* Apply bipred weighting. */
-                        if( lists_used == 3 )
-                            listamount = (listamount * bipred_weights[list] + 32) >> 6;
-
-                        /* Early termination for simple case of mv0. */
-                        if( !M32( mvs[list][mb_index] ) )
-                        {
-                            CLIP_ADD( ref_costs[list][mb_index], listamount );
-                            continue;
-                        }
-
-                        int x = mvs[list][mb_index][0];
-                        int y = mvs[list][mb_index][1];
-                        int mbx = (x>>5)+h->mb.i_mb_x;
-                        int mby = (y>>5)+h->mb.i_mb_y;
-                        int idx0 = mbx + mby * h->mb.i_mb_stride;
-                        int idx1 = idx0 + 1;
-                        int idx2 = idx0 + h->mb.i_mb_stride;
-                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
-                        x &= 31;
-                        y &= 31;
-                        int idx0weight = (32-y)*(32-x);
-                        int idx1weight = (32-y)*x;
-                        int idx2weight = y*(32-x);
-                        int idx3weight = y*x;
-
-                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
-                         * be counted. */
-                        if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
-                        {
-                            CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                        else /* Check offsets individually */
-                        {
-                            if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                    }
-            }
+            h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
+                                         bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
         }
     }

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * slicetype.c: lookahead analysis
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -1022,9 +1022,12 @@
     return i_score;
 }
 
+/* Trade off precision in mbtree for increased range */
+#define MBTREE_PRECISION 0.5f
+
 static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
 {
-    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
+    int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
     float weightdelta = 0.0;
     if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
         weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
@@ -1051,11 +1054,12 @@
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
     int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
     int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
-    int *buf = h->scratch_buffer;
+    int16_t *buf = h->scratch_buffer;
     uint16_t *propagate_cost = frames[b]->i_propagate_cost;
+    uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
 
     x264_emms();
-    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
+    float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
 
     /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
     if( !referenced )
@@ -1065,72 +1069,17 @@
     {
         int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
         h->mc.mbtree_propagate_cost( buf, propagate_cost,
-            frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
+            frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
             frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
         if( referenced )
             propagate_cost += h->mb.i_mb_width;
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
+
+        h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
+                                     bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
+        if( b != p1 )
         {
-            int propagate_amount = buf[h->mb.i_mb_x];
-            /* Don't propagate for an intra block. */
-            if( propagate_amount > 0 )
-            {
-                /* Access width-2 bitfield. */
-                int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
-                /* Follow the MVs to the previous frame(s). */
-                for( int list = 0; list < 2; list++ )
-                    if( (lists_used >> list)&1 )
-                    {
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
-                        int listamount = propagate_amount;
-                        /* Apply bipred weighting. */
-                        if( lists_used == 3 )
-                            listamount = (listamount * bipred_weights[list] + 32) >> 6;
-
-                        /* Early termination for simple case of mv0. */
-                        if( !M32( mvs[list][mb_index] ) )
-                        {
-                            CLIP_ADD( ref_costs[list][mb_index], listamount );
-                            continue;
-                        }
-
-                        int x = mvs[list][mb_index][0];
-                        int y = mvs[list][mb_index][1];
-                        int mbx = (x>>5)+h->mb.i_mb_x;
-                        int mby = (y>>5)+h->mb.i_mb_y;
-                        int idx0 = mbx + mby * h->mb.i_mb_stride;
-                        int idx1 = idx0 + 1;
-                        int idx2 = idx0 + h->mb.i_mb_stride;
-                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
-                        x &= 31;
-                        y &= 31;
-                        int idx0weight = (32-y)*(32-x);
-                        int idx1weight = (32-y)*x;
-                        int idx2weight = y*(32-x);
-                        int idx3weight = y*x;
-
-                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
-                         * be counted. */
-                        if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
-                        {
-                            CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                        else /* Check offsets individually */
-                        {
-                            if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
-                                CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
-                            if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
-                            if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
-                        }
-                    }
-            }
+            h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
+                                         bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
         }
     }
 
​

x264-snapshot-20130723-2245.tar.bz2/filters/filters.c -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * filters.c: common filter functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Diogo Franco <diogomfranco@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/filters/filters.h -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * filters.h: common filter functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Diogo Franco <diogomfranco@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/cache.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/cache.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * cache.c: cache video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/crop.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/crop.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * crop.c: crop video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *          James Darnley <james.darnley@gmail.com>
@@ -105,8 +105,7 @@
     for( int i = 0; i < output->img.planes; i++ )
     {
         intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
-        offset += h->dims[0] * h->csp->width[i];
-        offset *= x264_cli_csp_depth_factor( output->img.csp );
+        offset += h->dims[0] * h->csp->width[i] * x264_cli_csp_depth_factor( output->img.csp );
         output->img.plane[i] += offset;
     }
     return 0;
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/depth.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/depth.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * depth.c: bit-depth conversion video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Oskar Arvidsson <oskar@irock.se>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/fix_vfr_pts.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/fix_vfr_pts.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * fix_vfr_pts.c: vfr pts fixing video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * internal.c: video filter utilities
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * internal.h: video filter utilities
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/resize.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/resize.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * resize.c: resize video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -45,8 +45,8 @@
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 
-#ifndef PIX_FMT_BGRA64
-#define PIX_FMT_BGRA64 PIX_FMT_NONE
+#ifndef AV_PIX_FMT_BGRA64
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE
 #endif
 
 typedef struct
@@ -94,9 +94,12 @@
 
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
     {
-        printf( "%s", x264_cli_csps[i].name );
-        if( i+1 < X264_CSP_CLI_MAX )
-            printf( ", " );
+        if( x264_cli_csps[i].name )
+        {
+            printf( "%s", x264_cli_csps[i].name );
+            if( i+1 < X264_CSP_CLI_MAX )
+                printf( ", " );
+        }
     }
     printf( "\n"
             "               - depth: 8 or 16 bits per pixel [keep current]\n"
@@ -143,19 +146,19 @@
     switch( csp&X264_CSP_MASK )
     {
         case X264_CSP_YV12: /* specially handled via swapping chroma */
-        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
         case X264_CSP_YV16: /* specially handled via swapping chroma */
-        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P;
         case X264_CSP_YV24: /* specially handled via swapping chroma */
-        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
-        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
-        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48     : PIX_FMT_BGR24;
-        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64    : PIX_FMT_BGRA;
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P;
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48     : AV_PIX_FMT_RGB24;
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48     : AV_PIX_FMT_BGR24;
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64    : AV_PIX_FMT_BGRA;
         /* the next csp has no equivalent 16bit depth in swscale */
-        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV12;
         /* the next csp is no supported by swscale at all */
         case X264_CSP_NV16:
-        default:            return PIX_FMT_NONE;
+        default:            return AV_PIX_FMT_NONE;
     }
 }
 
@@ -175,12 +178,12 @@
     int pix_fmt = convert_csp_to_pix_fmt( csp );
     // first determine the base csp
     int ret = X264_CSP_NONE;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt;
-    if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name )
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt );
+    if( !pix_desc || !pix_desc->name )
         return ret;
 
     const char *pix_fmt_name = pix_desc->name;
-    int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL);
+    int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL);
     int is_bgr = !!strstr( pix_fmt_name, "bgr" );
     if( is_bgr || is_rgb )
     {
@@ -243,8 +246,11 @@
         if( strlen( str_csp ) == 0 )
             csp = info->csp & X264_CSP_MASK;
         else
-            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
-                csp--;
+            for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- )
+            {
+                if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) )
+                    break;
+            }
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
         h->dst_csp = csp;
         if( depth == 16 )
@@ -392,7 +398,7 @@
     h->scale = input_prop;
     if( !h->buffer_allocated )
     {
-        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
+        if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
             return -1;
         h->buffer_allocated = 1;
     }
@@ -462,11 +468,11 @@
     int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
 
     /* confirm swscale can support this conversion */
-    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
                    info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
-    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
                    h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * resize.c: resize video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -45,8 +45,8 @@
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 
-#ifndef PIX_FMT_BGRA64
-#define PIX_FMT_BGRA64 PIX_FMT_NONE
+#ifndef AV_PIX_FMT_BGRA64
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE
 #endif
 
 typedef struct
@@ -94,9 +94,12 @@
 
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
     {
-        printf( "%s", x264_cli_csps[i].name );
-        if( i+1 < X264_CSP_CLI_MAX )
-            printf( ", " );
+        if( x264_cli_csps[i].name )
+        {
+            printf( "%s", x264_cli_csps[i].name );
+            if( i+1 < X264_CSP_CLI_MAX )
+                printf( ", " );
+        }
     }
     printf( "\n"
             "               - depth: 8 or 16 bits per pixel [keep current]\n"
@@ -143,19 +146,19 @@
     switch( csp&X264_CSP_MASK )
     {
         case X264_CSP_YV12: /* specially handled via swapping chroma */
-        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
         case X264_CSP_YV16: /* specially handled via swapping chroma */
-        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P;
         case X264_CSP_YV24: /* specially handled via swapping chroma */
-        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
-        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
-        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48     : PIX_FMT_BGR24;
-        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64    : PIX_FMT_BGRA;
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P;
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48     : AV_PIX_FMT_RGB24;
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48     : AV_PIX_FMT_BGR24;
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64    : AV_PIX_FMT_BGRA;
         /* the next csp has no equivalent 16bit depth in swscale */
-        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE      : AV_PIX_FMT_NV12;
         /* the next csp is no supported by swscale at all */
         case X264_CSP_NV16:
-        default:            return PIX_FMT_NONE;
+        default:            return AV_PIX_FMT_NONE;
     }
 }
 
@@ -175,12 +178,12 @@
     int pix_fmt = convert_csp_to_pix_fmt( csp );
     // first determine the base csp
     int ret = X264_CSP_NONE;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt;
-    if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name )
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt );
+    if( !pix_desc || !pix_desc->name )
         return ret;
 
     const char *pix_fmt_name = pix_desc->name;
-    int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL);
+    int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL);
     int is_bgr = !!strstr( pix_fmt_name, "bgr" );
     if( is_bgr || is_rgb )
     {
@@ -243,8 +246,11 @@
         if( strlen( str_csp ) == 0 )
             csp = info->csp & X264_CSP_MASK;
         else
-            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
-                csp--;
+            for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- )
+            {
+                if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) )
+                    break;
+            }
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
         h->dst_csp = csp;
         if( depth == 16 )
@@ -392,7 +398,7 @@
     h->scale = input_prop;
     if( !h->buffer_allocated )
     {
-        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
+        if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
             return -1;
         h->buffer_allocated = 1;
     }
@@ -462,11 +468,11 @@
     int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
 
     /* confirm swscale can support this conversion */
-    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
                    info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
-    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
+    FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
                    "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
                    h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/select_every.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/select_every.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * select_every.c: select-every video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/source.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/source.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * source.c: source video filter
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/video.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * video.c: video filters
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/filters/video/video.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * video.h: video filters
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/input/avs.c -> x264-snapshot-20140321-2245.tar.bz2/input/avs.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * avs.c: avisynth input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -35,7 +35,7 @@
 #define avs_address dlsym
 #else
 #include <windows.h>
-#define avs_open LoadLibrary( "avisynth" )
+#define avs_open LoadLibraryW( L"avisynth" )
 #define avs_close FreeLibrary
 #define avs_address GetProcAddress
 #endif
@@ -172,7 +172,7 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
 {
-    FILE *fh = fopen( psz_filename, "r" );
+    FILE *fh = x264_fopen( psz_filename, "r" );
     if( !fh )
         return -1;
     FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
@@ -192,7 +192,16 @@
     if( avs_version <= 0 )
         return -1;
     x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
+
+#ifdef _WIN32
+    /* Avisynth doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    AVS_Value arg = avs_new_value_string( ansi_filename );
+#else
     AVS_Value arg = avs_new_value_string( psz_filename );
+#endif
+
     AVS_Value res;
     char *filename_ext = get_filename_extension( psz_filename );
 
@@ -329,11 +338,11 @@
         info->csp = X264_CSP_I420;
 #if HAVE_SWSCALE
     else if( avs_is_yuy2( vi ) )
-        info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
     else if( avs_is_yv411( vi ) )
-        info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
     else if( avs_is_y8( vi ) )
-        info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
 #endif
     else
         info->csp = X264_CSP_NONE;
@@ -352,7 +361,7 @@
     if( cli_csp )
         pic->img.planes = cli_csp->planes;
 #if HAVE_SWSCALE
-    else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) )
+    else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) )
         pic->img.planes = 3;
     else
         pic->img.planes = 1; //y8 and yuy2 are one plane

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * avs.c: avisynth input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -35,7 +35,7 @@
 #define avs_address dlsym
 #else
 #include <windows.h>
-#define avs_open LoadLibrary( "avisynth" )
+#define avs_open LoadLibraryW( L"avisynth" )
 #define avs_close FreeLibrary
 #define avs_address GetProcAddress
 #endif
@@ -172,7 +172,7 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
 {
-    FILE *fh = fopen( psz_filename, "r" );
+    FILE *fh = x264_fopen( psz_filename, "r" );
     if( !fh )
         return -1;
     FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
@@ -192,7 +192,16 @@
     if( avs_version <= 0 )
         return -1;
     x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
+
+#ifdef _WIN32
+    /* Avisynth doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    AVS_Value arg = avs_new_value_string( ansi_filename );
+#else
     AVS_Value arg = avs_new_value_string( psz_filename );
+#endif
+
     AVS_Value res;
     char *filename_ext = get_filename_extension( psz_filename );
 
@@ -329,11 +338,11 @@
         info->csp = X264_CSP_I420;
 #if HAVE_SWSCALE
     else if( avs_is_yuy2( vi ) )
-        info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
     else if( avs_is_yv411( vi ) )
-        info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
     else if( avs_is_y8( vi ) )
-        info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER;
+        info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
 #endif
     else
         info->csp = X264_CSP_NONE;
@@ -352,7 +361,7 @@
     if( cli_csp )
         pic->img.planes = cli_csp->planes;
 #if HAVE_SWSCALE
-    else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) )
+    else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) )
         pic->img.planes = 3;
     else
         pic->img.planes = 1; //y8 and yuy2 are one plane
​

x264-snapshot-20130723-2245.tar.bz2/input/ffms.c -> x264-snapshot-20140321-2245.tar.bz2/input/ffms.c Changed

@@ -1,10 +1,11 @@
 /*****************************************************************************
  * ffms.c: ffmpegsource input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,8 +35,6 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#else
-#define SetConsoleTitle(t)
 #endif
 
 typedef struct
@@ -60,7 +59,7 @@
     char buf[200];
     sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total );
     fprintf( stderr, "%s  \r", buf+5 );
-    SetConsoleTitle( buf );
+    x264_cli_set_console_title( buf );
     fflush( stderr );
     return 0;
 }
@@ -70,9 +69,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -82,7 +81,21 @@
     ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) );
     if( !h )
         return -1;
+
+#ifdef __MINGW32__
+    /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */
     FFMS_Init( 0, 0 );
+    char src_filename[MAX_PATH];
+    char idx_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    if( opt->index_file )
+        FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" );
+#else
+    FFMS_Init( 0, 1 );
+    char *src_filename = psz_filename;
+    char *idx_filename = opt->index_file;
+#endif
+
     FFMS_ErrorInfo e;
     e.BufferSize = 0;
     int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW;
@@ -90,29 +103,29 @@
     FFMS_Index *idx = NULL;
     if( opt->index_file )
     {
-        struct stat index_s, input_s;
-        if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) &&
-            input_s.st_mtime < index_s.st_mtime )
-            idx = FFMS_ReadIndex( opt->index_file, &e );
+        x264_struct_stat index_s, input_s;
+        if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) &&
+            input_s.st_mtime < index_s.st_mtime && index_s.st_size )
+            idx = FFMS_ReadIndex( idx_filename, &e );
     }
     if( !idx )
     {
         if( opt->progress )
         {
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
             fprintf( stderr, "                                            \r" );
         }
         else
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
         FAIL_IF_ERROR( !idx, "could not create index\n" )
-        if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
+        if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) )
             x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
     }
 
     int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
     FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
 
-    h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
+    h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e );
     FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
 
     h->track = FFMS_GetTrackFromVideo( h->video_source );

 
@@ -1,10 +1,11 @@
 /*****************************************************************************
  * ffms.c: ffmpegsource input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,8 +35,6 @@
 
 #ifdef _WIN32
 #include <windows.h>
-#else
-#define SetConsoleTitle(t)
 #endif
 
 typedef struct
@@ -60,7 +59,7 @@
     char buf[200];
     sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total );
     fprintf( stderr, "%s  \r", buf+5 );
-    SetConsoleTitle( buf );
+    x264_cli_set_console_title( buf );
     fflush( stderr );
     return 0;
 }
@@ -70,9 +69,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -82,7 +81,21 @@
     ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) );
     if( !h )
         return -1;
+
+#ifdef __MINGW32__
+    /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */
     FFMS_Init( 0, 0 );
+    char src_filename[MAX_PATH];
+    char idx_filename[MAX_PATH];
+    FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
+    if( opt->index_file )
+        FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" );
+#else
+    FFMS_Init( 0, 1 );
+    char *src_filename = psz_filename;
+    char *idx_filename = opt->index_file;
+#endif
+
     FFMS_ErrorInfo e;
     e.BufferSize = 0;
     int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW;
@@ -90,29 +103,29 @@
     FFMS_Index *idx = NULL;
     if( opt->index_file )
     {
-        struct stat index_s, input_s;
-        if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) &&
-            input_s.st_mtime < index_s.st_mtime )
-            idx = FFMS_ReadIndex( opt->index_file, &e );
+        x264_struct_stat index_s, input_s;
+        if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) &&
+            input_s.st_mtime < index_s.st_mtime && index_s.st_size )
+            idx = FFMS_ReadIndex( idx_filename, &e );
     }
     if( !idx )
     {
         if( opt->progress )
         {
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
             fprintf( stderr, "                                            \r" );
         }
         else
-            idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
+            idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
         FAIL_IF_ERROR( !idx, "could not create index\n" )
-        if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
+        if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) )
             x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
     }
 
     int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
     FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
 
-    h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
+    h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e );
     FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
 
     h->track = FFMS_GetTrackFromVideo( h->video_source );
​

x264-snapshot-20130723-2245.tar.bz2/input/input.c -> x264-snapshot-20140321-2245.tar.bz2/input/input.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.c: common input functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -42,7 +42,8 @@
 int x264_cli_csp_is_invalid( int csp )
 {
     int csp_mask = csp & X264_CSP_MASK;
-    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
+    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX ||
+           csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER;
 }
 
 int x264_cli_csp_depth_factor( int csp )
@@ -74,7 +75,7 @@
     return size;
 }
 
-int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
 {
     memset( pic, 0, sizeof(cli_pic_t) );
     int csp_mask = csp & X264_CSP_MASK;
@@ -87,15 +88,29 @@
     pic->img.height = height;
     for( int i = 0; i < pic->img.planes; i++ )
     {
-         pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
-         if( !pic->img.plane[i] )
-             return -1;
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
+        int stride = width * x264_cli_csps[csp_mask].width[i];
+        stride *= x264_cli_csp_depth_factor( csp );
+        stride = ALIGN( stride, align );
+        uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
+        pic->img.plane[i] = x264_malloc( size );
+        if( !pic->img.plane[i] )
+            return -1;
+        pic->img.stride[i] = stride;
     }
 
     return 0;
 }
 
+int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
+}
+
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
+}
+
 void x264_cli_pic_clean( cli_pic_t *pic )
 {
     for( int i = 0; i < pic->img.planes; i++ )

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.c: common input functions
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Steven Walters <kemuri9@gmail.com>
  *
@@ -42,7 +42,8 @@
 int x264_cli_csp_is_invalid( int csp )
 {
     int csp_mask = csp & X264_CSP_MASK;
-    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
+    return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX ||
+           csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER;
 }
 
 int x264_cli_csp_depth_factor( int csp )
@@ -74,7 +75,7 @@
     return size;
 }
 
-int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
 {
     memset( pic, 0, sizeof(cli_pic_t) );
     int csp_mask = csp & X264_CSP_MASK;
@@ -87,15 +88,29 @@
     pic->img.height = height;
     for( int i = 0; i < pic->img.planes; i++ )
     {
-         pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
-         if( !pic->img.plane[i] )
-             return -1;
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
+        int stride = width * x264_cli_csps[csp_mask].width[i];
+        stride *= x264_cli_csp_depth_factor( csp );
+        stride = ALIGN( stride, align );
+        uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
+        pic->img.plane[i] = x264_malloc( size );
+        if( !pic->img.plane[i] )
+            return -1;
+        pic->img.stride[i] = stride;
     }
 
     return 0;
 }
 
+int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
+}
+
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
+{
+    return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
+}
+
 void x264_cli_pic_clean( cli_pic_t *pic )
 {
     for( int i = 0; i < pic->img.planes; i++ )
​

x264-snapshot-20130723-2245.tar.bz2/input/input.h -> x264-snapshot-20140321-2245.tar.bz2/input/input.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * input.h: file input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -124,6 +124,7 @@
 int      x264_cli_csp_is_invalid( int csp );
 int      x264_cli_csp_depth_factor( int csp );
 int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
+int      x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height );
 void     x264_cli_pic_clean( cli_pic_t *pic );
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
 uint64_t x264_cli_pic_size( int csp, int width, int height );
​

x264-snapshot-20130723-2245.tar.bz2/input/lavf.c -> x264-snapshot-20140321-2245.tar.bz2/input/lavf.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lavf.c: libavformat input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -53,9 +53,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -162,7 +162,7 @@
     if( opt->resolution )
     {
         av_dict_set( &options, "video_size", opt->resolution, 0 );
-        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P );
+        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P );
         av_dict_set( &options, "pixel_format", csp, 0 );
     }
 
@@ -210,7 +210,7 @@
 
     /* avisynth stores rgb data vertically flipped. */
     if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
-        (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) )
+        (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
         info->csp |= X264_CSP_VFLIP;
 
     *p_handle = h;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * lavf.c: libavformat input
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
  *          Steven Walters <kemuri9@gmail.com>
@@ -53,9 +53,9 @@
 {
     switch( csp )
     {
-        case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
-        case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
-        case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
+        case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
+        case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
+        case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
         default:                               return csp;
     }
 }
@@ -162,7 +162,7 @@
     if( opt->resolution )
     {
         av_dict_set( &options, "video_size", opt->resolution, 0 );
-        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P );
+        const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P );
         av_dict_set( &options, "pixel_format", csp, 0 );
     }
 
@@ -210,7 +210,7 @@
 
     /* avisynth stores rgb data vertically flipped. */
     if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
-        (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) )
+        (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
         info->csp |= X264_CSP_VFLIP;
 
     *p_handle = h;
​

x264-snapshot-20130723-2245.tar.bz2/input/raw.c -> x264-snapshot-20140321-2245.tar.bz2/input/raw.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -55,8 +55,11 @@
     FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
     if( opt->colorspace )
     {
-        for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); )
-            info->csp--;
+        for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
+        {
+            if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) )
+                break;
+        }
         FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace );
     }
     else /* default */
@@ -70,7 +73,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen( psz_filename, "rb" );
+        h->fh = x264_fopen( psz_filename, "rb" );
     if( h->fh == NULL )
         return -1;
 
@@ -99,14 +102,14 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
 {
     int error = 0;
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
     for( int i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -131,13 +134,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -55,8 +55,11 @@
     FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
     if( opt->colorspace )
     {
-        for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); )
-            info->csp--;
+        for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
+        {
+            if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) )
+                break;
+        }
         FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace );
     }
     else /* default */
@@ -70,7 +73,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen( psz_filename, "rb" );
+        h->fh = x264_fopen( psz_filename, "rb" );
     if( h->fh == NULL )
         return -1;
 
@@ -99,14 +102,14 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
 {
     int error = 0;
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
     for( int i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -131,13 +134,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;
​

x264-snapshot-20130723-2245.tar.bz2/input/thread.c -> x264-snapshot-20140321-2245.tar.bz2/input/thread.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * thread.c: threaded input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/input/timecode.c -> x264-snapshot-20140321-2245.tar.bz2/input/timecode.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * timecode.c: timecode file input
  *****************************************************************************
- * Copyright (C) 2010-2013 x264 project
+ * Copyright (C) 2010-2014 x264 project
  *
  * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
  *
@@ -368,7 +368,7 @@
     timecode_input.picture_alloc = h->input.picture_alloc;
     timecode_input.picture_clean = h->input.picture_clean;
 
-    tcfile_in = fopen( psz_filename, "rb" );
+    tcfile_in = x264_fopen( psz_filename, "rb" );
     FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
     else if( !x264_is_regular_file( tcfile_in ) )
     {
​

x264-snapshot-20130723-2245.tar.bz2/input/y4m.c -> x264-snapshot-20140321-2245.tar.bz2/input/y4m.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * y4m.c: y4m input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -81,7 +81,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen(psz_filename, "rb");
+        h->fh = x264_fopen(psz_filename, "rb");
     if( h->fh == NULL )
         return -1;
 
@@ -223,7 +223,7 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
 {
     size_t slen = strlen( Y4M_FRAME_MAGIC );
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
@@ -249,7 +249,7 @@
     for( i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -274,13 +274,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * y4m.c: y4m input
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -81,7 +81,7 @@
     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
     else
-        h->fh = fopen(psz_filename, "rb");
+        h->fh = x264_fopen(psz_filename, "rb");
     if( h->fh == NULL )
         return -1;
 
@@ -223,7 +223,7 @@
     return 0;
 }
 
-static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
+static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
 {
     size_t slen = strlen( Y4M_FRAME_MAGIC );
     int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
@@ -249,7 +249,7 @@
     for( i = 0; i < pic->img.planes && !error; i++ )
     {
         error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
-        if( h->bit_depth & 7 )
+        if( bit_depth_uc )
         {
             /* upconvert non 16bit high depth planes to 16bit using the same
              * algorithm as used in the depth filter. */
@@ -274,13 +274,13 @@
         else
             while( i_frame > h->next_frame )
             {
-                if( read_frame_internal( pic, h ) )
+                if( read_frame_internal( pic, h, 0 ) )
                     return -1;
                 h->next_frame++;
             }
     }
 
-    if( read_frame_internal( pic, h ) )
+    if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
         return -1;
 
     h->next_frame = i_frame+1;
​

x264-snapshot-20130723-2245.tar.bz2/output/flv.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv.c: flv muxer
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
@@ -75,11 +75,10 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    flv_hnd_t *p_flv = malloc( sizeof(*p_flv) );
     *p_handle = NULL;
+    flv_hnd_t *p_flv = calloc( 1, sizeof(flv_hnd_t) );
     if( !p_flv )
         return -1;
-    memset( p_flv, 0, sizeof(*p_flv) );
 
     p_flv->b_dts_compress = opt->use_dts_compress;
 
​

x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv_bytestream.c: flv muxer utilities
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
@@ -87,16 +87,14 @@
 
 flv_buffer *flv_create_writer( const char *filename )
 {
-    flv_buffer *c = malloc( sizeof(*c) );
-
+    flv_buffer *c = calloc( 1, sizeof(flv_buffer) );
     if( !c )
         return NULL;
-    memset( c, 0, sizeof(*c) );
 
     if( !strcmp( filename, "-" ) )
         c->fp = stdout;
     else
-        c->fp = fopen( filename, "wb" );
+        c->fp = x264_fopen( filename, "wb" );
     if( !c->fp )
     {
         free( c );
​

x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.h -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * flv_bytestream.h: flv muxer utilities
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Kieran Kunhya <kieran@kunhya.com>
  *
​

x264-snapshot-20130723-2245.tar.bz2/output/matroska.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska.c: matroska muxer
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -33,6 +33,7 @@
     int width, height, d_width, d_height;
 
     int display_size_units;
+    int stereo_mode;
 
     int64_t frame_duration;
 
@@ -44,16 +45,11 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mkv_hnd_t *p_mkv;
-
     *p_handle = NULL;
-
-    p_mkv  = malloc( sizeof(*p_mkv) );
+    mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) );
     if( !p_mkv )
         return -1;
 
-    memset( p_mkv, 0, sizeof(*p_mkv) );
-
     p_mkv->w = mk_create_writer( psz_filename );
     if( !p_mkv->w )
     {
@@ -84,6 +80,7 @@
     p_mkv->width = p_mkv->d_width = p_param->i_width;
     p_mkv->height = p_mkv->d_height = p_param->i_height;
     p_mkv->display_size_units = DS_PIXELS;
+    p_mkv->stereo_mode = p_param->i_frame_packing;
 
     if( p_param->vui.i_sar_width && p_param->vui.i_sar_height
         && p_param->vui.i_sar_width != p_param->vui.i_sar_height )
@@ -152,7 +149,7 @@
     ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
                            avcC, avcC_len, p_mkv->frame_duration, 50000,
                            p_mkv->width, p_mkv->height,
-                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units );
+                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode );
     if( ret < 0 )
         return ret;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska.c: matroska muxer
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -33,6 +33,7 @@
     int width, height, d_width, d_height;
 
     int display_size_units;
+    int stereo_mode;
 
     int64_t frame_duration;
 
@@ -44,16 +45,11 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mkv_hnd_t *p_mkv;
-
     *p_handle = NULL;
-
-    p_mkv  = malloc( sizeof(*p_mkv) );
+    mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) );
     if( !p_mkv )
         return -1;
 
-    memset( p_mkv, 0, sizeof(*p_mkv) );
-
     p_mkv->w = mk_create_writer( psz_filename );
     if( !p_mkv->w )
     {
@@ -84,6 +80,7 @@
     p_mkv->width = p_mkv->d_width = p_param->i_width;
     p_mkv->height = p_mkv->d_height = p_param->i_height;
     p_mkv->display_size_units = DS_PIXELS;
+    p_mkv->stereo_mode = p_param->i_frame_packing;
 
     if( p_param->vui.i_sar_width && p_param->vui.i_sar_height
         && p_param->vui.i_sar_width != p_param->vui.i_sar_height )
@@ -152,7 +149,7 @@
     ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
                            avcC, avcC_len, p_mkv->frame_duration, 50000,
                            p_mkv->width, p_mkv->height,
-                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units );
+                           p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode );
     if( ret < 0 )
         return ret;
 
​

x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.c: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -74,10 +74,9 @@
     }
     else
     {
-        c = malloc( sizeof(*c) );
+        c = calloc( 1, sizeof(mk_context) );
         if( !c )
             return NULL;
-        memset( c, 0, sizeof(*c) );
     }
 
     c->parent = parent;
@@ -291,12 +290,10 @@
 
 mk_writer *mk_create_writer( const char *filename )
 {
-    mk_writer *w = malloc( sizeof(*w) );
+    mk_writer *w = calloc( 1, sizeof(mk_writer) );
     if( !w )
         return NULL;
 
-    memset( w, 0, sizeof(*w) );
-
     w->root = mk_create_context( w, NULL, 0 );
     if( !w->root )
     {
@@ -307,7 +304,7 @@
     if( !strcmp( filename, "-" ) )
         w->fp = stdout;
     else
-        w->fp = fopen( filename, "wb" );
+        w->fp = x264_fopen( filename, "wb" );
     if( !w->fp )
     {
         mk_destroy_contexts( w );
@@ -320,13 +317,15 @@
     return w;
 }
 
+static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13};
+
 int mk_write_header( mk_writer *w, const char *writing_app,
                      const char *codec_id,
                      const void *codec_private, unsigned codec_private_size,
                      int64_t default_frame_duration,
                      int64_t timescale,
                      unsigned width, unsigned height,
-                     unsigned d_width, unsigned d_height, int display_size_units )
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode )
 {
     mk_context  *c, *ti, *v;
 
@@ -382,6 +381,8 @@
     CHECK( mk_write_uint( v, 0x54b2, display_size_units ) );
     CHECK( mk_write_uint( v, 0x54b0, d_width ) );
     CHECK( mk_write_uint( v, 0x54ba, d_height ) );
+    if( stereo_mode >= 0 && stereo_mode <= 5 )
+        CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) );
     CHECK( mk_close_context( v, 0 ) );
 
     CHECK( mk_close_context( ti, 0 ) );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.c: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -74,10 +74,9 @@
     }
     else
     {
-        c = malloc( sizeof(*c) );
+        c = calloc( 1, sizeof(mk_context) );
         if( !c )
             return NULL;
-        memset( c, 0, sizeof(*c) );
     }
 
     c->parent = parent;
@@ -291,12 +290,10 @@
 
 mk_writer *mk_create_writer( const char *filename )
 {
-    mk_writer *w = malloc( sizeof(*w) );
+    mk_writer *w = calloc( 1, sizeof(mk_writer) );
     if( !w )
         return NULL;
 
-    memset( w, 0, sizeof(*w) );
-
     w->root = mk_create_context( w, NULL, 0 );
     if( !w->root )
     {
@@ -307,7 +304,7 @@
     if( !strcmp( filename, "-" ) )
         w->fp = stdout;
     else
-        w->fp = fopen( filename, "wb" );
+        w->fp = x264_fopen( filename, "wb" );
     if( !w->fp )
     {
         mk_destroy_contexts( w );
@@ -320,13 +317,15 @@
     return w;
 }
 
+static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13};
+
 int mk_write_header( mk_writer *w, const char *writing_app,
                      const char *codec_id,
                      const void *codec_private, unsigned codec_private_size,
                      int64_t default_frame_duration,
                      int64_t timescale,
                      unsigned width, unsigned height,
-                     unsigned d_width, unsigned d_height, int display_size_units )
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode )
 {
     mk_context  *c, *ti, *v;
 
@@ -382,6 +381,8 @@
     CHECK( mk_write_uint( v, 0x54b2, display_size_units ) );
     CHECK( mk_write_uint( v, 0x54b0, d_width ) );
     CHECK( mk_write_uint( v, 0x54ba, d_height ) );
+    if( stereo_mode >= 0 && stereo_mode <= 5 )
+        CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) );
     CHECK( mk_close_context( v, 0 ) );
 
     CHECK( mk_close_context( ti, 0 ) );
​

x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.h -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * matroska_ebml.h: matroska muxer utilities
  *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2005-2014 x264 project
  *
  * Authors: Mike Matsnev <mike@haali.su>
  *
@@ -42,7 +42,7 @@
                      int64_t default_frame_duration,
                      int64_t timescale,
                      unsigned width, unsigned height,
-                     unsigned d_width, unsigned d_height, int display_size_units );
+                     unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode );
 
 int mk_start_frame( mk_writer *w );
 int mk_add_frame_data( mk_writer *w, const void *data, unsigned size );
​

x264-snapshot-20130723-2245.tar.bz2/output/mp4.c -> x264-snapshot-20140321-2245.tar.bz2/output/mp4.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mp4.c: mp4 muxer
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,13 +27,8 @@
 #include "output.h"
 #include <gpac/isomedia.h>
 
-#if HAVE_GF_MALLOC
-#undef malloc
-#undef free
-#undef realloc
-#define malloc gf_malloc
-#define free gf_free
-#define realloc gf_realloc
+#ifdef _WIN32
+#include <windows.h>
 #endif
 
 typedef struct
@@ -170,20 +165,25 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mp4_hnd_t *p_mp4;
-
     *p_handle = NULL;
-    FILE *fh = fopen( psz_filename, "w" );
+    FILE *fh = x264_fopen( psz_filename, "w" );
     if( !fh )
         return -1;
     FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
     fclose( fh );
 
-    if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    if( !p_mp4 )
         return -1;
 
-    memset( p_mp4, 0, sizeof(mp4_hnd_t) );
+#ifdef _WIN32
+    /* GPAC doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
+    p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
+#else
     p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
+#endif
 
     p_mp4->b_dts_compress = opt->use_dts_compress;

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * mp4.c: mp4 muxer
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -27,13 +27,8 @@
 #include "output.h"
 #include <gpac/isomedia.h>
 
-#if HAVE_GF_MALLOC
-#undef malloc
-#undef free
-#undef realloc
-#define malloc gf_malloc
-#define free gf_free
-#define realloc gf_realloc
+#ifdef _WIN32
+#include <windows.h>
 #endif
 
 typedef struct
@@ -170,20 +165,25 @@
 
 static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
 {
-    mp4_hnd_t *p_mp4;
-
     *p_handle = NULL;
-    FILE *fh = fopen( psz_filename, "w" );
+    FILE *fh = x264_fopen( psz_filename, "w" );
     if( !fh )
         return -1;
     FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
     fclose( fh );
 
-    if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    if( !p_mp4 )
         return -1;
 
-    memset( p_mp4, 0, sizeof(mp4_hnd_t) );
+#ifdef _WIN32
+    /* GPAC doesn't support Unicode filenames. */
+    char ansi_filename[MAX_PATH];
+    FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
+    p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
+#else
     p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
+#endif
 
     p_mp4->b_dts_compress = opt->use_dts_compress;
 
​

x264-snapshot-20140321-2245.tar.bz2/output/mp4_lsmash.c Added

@@ -0,0 +1,419 @@
+/*****************************************************************************
+ * mp4_lsmash.c: mp4 muxer using L-SMASH
+ *****************************************************************************
+ * Copyright (C) 2003-2014 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *          Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
+ *          Takashi Hirata <silverfilain@gmail.com>
+ *          golgol7777 <golgol7777@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "output.h"
+#include <lsmash.h>
+
+#define H264_NALU_LENGTH_SIZE 4
+
+/*******************/
+
+#define MP4_LOG_ERROR( ... )                x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ )
+#define MP4_LOG_WARNING( ... )              x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ )
+#define MP4_LOG_INFO( ... )                 x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ )
+#define MP4_FAIL_IF_ERR( cond, ... )        FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ )
+
+/* For close_file() */
+#define MP4_LOG_IF_ERR( cond, ... )\
+if( cond )\
+{\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+}
+
+/* For open_file() */
+#define MP4_FAIL_IF_ERR_EX( cond, ... )\
+if( cond )\
+{\
+    remove_mp4_hnd( p_mp4 );\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+    return -1;\
+}
+
+/*******************/
+
+typedef struct
+{
+    lsmash_root_t *p_root;
+    lsmash_video_summary_t *summary;
+    int b_stdout;
+    uint32_t i_movie_timescale;
+    uint32_t i_video_timescale;
+    uint32_t i_track;
+    uint32_t i_sample_entry;
+    uint64_t i_time_inc;
+    int64_t i_start_offset;
+    uint64_t i_first_cts;
+    uint64_t i_prev_dts;
+    uint32_t i_sei_size;
+    uint8_t *p_sei_buffer;
+    int i_numframe;
+    int64_t i_init_delta;
+    int i_delay_frames;
+    int b_dts_compress;
+    int i_dts_compress_multiplier;
+    int b_use_recovery;
+    int b_fragments;
+} mp4_hnd_t;
+
+/*******************/
+
+static void remove_mp4_hnd( hnd_t handle )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    if( !p_mp4 )
+        return;
+    if( p_mp4->p_sei_buffer )
+    {
+        free( p_mp4->p_sei_buffer );
+        p_mp4->p_sei_buffer = NULL;
+    }
+    if( p_mp4->p_root )
+    {
+        lsmash_destroy_root( p_mp4->p_root );
+        p_mp4->p_root = NULL;
+    }
+    free( p_mp4 );
+}
+
+/*******************/
+
+static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts )
+{
+    mp4_hnd_t *p_mp4 = handle;
+
+    if( !p_mp4 )
+        return 0;
+
+    if( p_mp4->p_root )
+    {
+        double actual_duration = 0;
+        if( p_mp4->i_track )
+        {
+            /* Flush the rest of samples and add the last sample_delta. */
+            uint32_t last_delta = largest_pts - second_largest_pts;
+            MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ),
+                            "failed to flush the rest of samples.\n" );
+
+            if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 )    /* avoid zero division */
+                actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale;
+            else
+                MP4_LOG_ERROR( "timescale is broken.\n" );
+
+            /*
+             * Declare the explicit time-line mapping.
+             * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment
+             * is given by not the movie timescale but rather the media timescale.
+             * The reason is that ISO media have two time-lines, presentation and media time-line,
+             * and an edit maps the presentation time-line to the media time-line.
+             * According to QuickTime file format specification and the actual playback in QuickTime Player,
+             * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes
+             * the track's media_rate so that the entire media can be used by the track.
+             * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly.
+             * Note: Any demuxers should follow the Edit List Box if it exists.
+             */
+            lsmash_edit_t edit;
+            edit.duration   = actual_duration;
+            edit.start_time = p_mp4->i_first_cts;
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
+            if( !p_mp4->b_fragments )
+            {
+                MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
+                                "failed to set timeline map for video.\n" );
+            }
+            else if( !p_mp4->b_stdout )
+                MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ),
+                                "failed to update timeline map for video.\n" );
+        }
+
+        MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" );
+    }
+
+    remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */
+
+    return 0;
+}
+
+static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
+{
+    *p_handle = NULL;
+
+    int b_regular = strcmp( psz_filename, "-" );
+    b_regular = b_regular && x264_is_regular_file_path( psz_filename );
+    if( b_regular )
+    {
+        FILE *fh = x264_fopen( psz_filename, "wb" );
+        MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename );
+        b_regular = x264_is_regular_file( fh );
+        fclose( fh );
+    }
+
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" );
+
+    p_mp4->b_dts_compress = opt->use_dts_compress;
+    p_mp4->b_use_recovery = 0; // we don't really support recovery
+    p_mp4->b_fragments    = !b_regular;
+    p_mp4->b_stdout       = !strcmp( psz_filename, "-" );
+
+    p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" );
+
+    p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->summary,
+                        "failed to allocate memory for summary information of video.\n" );
+    p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO;
+
+    *p_handle = p_mp4;
+
+    return 0;
+}
+
+static int set_param( hnd_t handle, x264_param_t *p_param )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    uint64_t i_media_timescale;
+
+    p_mp4->i_delay_frames = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
+    p_mp4->i_dts_compress_multiplier = p_mp4->b_dts_compress * p_mp4->i_delay_frames + 1;
+
+    i_media_timescale = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier;
+    p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier;
+    MP4_FAIL_IF_ERR( i_media_timescale > UINT32_MAX, "MP4 media timescale %"PRIu64" exceeds maximum\n", i_media_timescale );
+
+    /* Select brands. */
+    lsmash_brand_type brands[6] = { 0 };
+    uint32_t brand_count = 0;
+    brands[brand_count++] = ISOM_BRAND_TYPE_MP42;
+    brands[brand_count++] = ISOM_BRAND_TYPE_MP41;
+    brands[brand_count++] = ISOM_BRAND_TYPE_ISOM;
+    if( p_mp4->b_use_recovery )
+    {
+        brands[brand_count++] = ISOM_BRAND_TYPE_AVC1;   /* sdtp, sgpd, sbgp and visual roll recovery grouping */
+        if( p_param->b_open_gop )
+            brands[brand_count++] = ISOM_BRAND_TYPE_ISO6;   /* cslg and visual random access grouping */
+    }
+
+    /* Set movie parameters. */
+    lsmash_movie_parameters_t movie_param;
+    lsmash_initialize_movie_parameters( &movie_param );
+    movie_param.major_brand = ISOM_BRAND_TYPE_MP42;
+    movie_param.brands = brands;
+    movie_param.number_of_brands = brand_count;
+    MP4_FAIL_IF_ERR( lsmash_set_movie_parameters( p_mp4->p_root, &movie_param ),
+                     "failed to set movie parameters.\n" );
+    p_mp4->i_movie_timescale = lsmash_get_movie_timescale( p_mp4->p_root );
+    MP4_FAIL_IF_ERR( !p_mp4->i_movie_timescale, "movie timescale is broken.\n" );
+
+    /* Create a video track. */
+    p_mp4->i_track = lsmash_create_track( p_mp4->p_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK );
+    MP4_FAIL_IF_ERR( !p_mp4->i_track, "failed to create a video track.\n" );
+
+    p_mp4->summary->width = p_param->i_width;
+    p_mp4->summary->height = p_param->i_height;
+    uint32_t i_display_width = p_param->i_width << 16;
+    uint32_t i_display_height = p_param->i_height << 16;
+    if( p_param->vui.i_sar_width && p_param->vui.i_sar_height )
+    {
+        double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height;
+        if( sar > 1.0 )
+            i_display_width *= sar;
+        else
+            i_display_height /= sar;
+        p_mp4->summary->par_h = p_param->vui.i_sar_width;
+        p_mp4->summary->par_v = p_param->vui.i_sar_height;
+    }
+    p_mp4->summary->color.primaries_index = p_param->vui.i_colorprim;
+    p_mp4->summary->color.transfer_index  = p_param->vui.i_transfer;
+    p_mp4->summary->color.matrix_index    = p_param->vui.i_colmatrix >= 0 ? p_param->vui.i_colmatrix : ISOM_MATRIX_INDEX_UNSPECIFIED;
+    p_mp4->summary->color.full_range      = p_param->vui.b_fullrange >= 0 ? p_param->vui.b_fullrange : 0;
+
+    /* Set video track parameters. */
+    lsmash_track_parameters_t track_param;
+    lsmash_initialize_track_parameters( &track_param );
+    lsmash_track_mode track_mode = ISOM_TRACK_ENABLED | ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW;
+    track_param.mode = track_mode;
+    track_param.display_width = i_display_width;
+    track_param.display_height = i_display_height;
+    MP4_FAIL_IF_ERR( lsmash_set_track_parameters( p_mp4->p_root, p_mp4->i_track, &track_param ),
+                     "failed to set track parameters for video.\n" );
+
+    /* Set video media parameters. */
+    lsmash_media_parameters_t media_param;
+    lsmash_initialize_media_parameters( &media_param );
+    media_param.timescale = i_media_timescale;
+    media_param.media_handler_name = "L-SMASH Video Media Handler";
+    if( p_mp4->b_use_recovery )
+    {
+        media_param.roll_grouping = p_param->b_intra_refresh;
+        media_param.rap_grouping = p_param->b_open_gop;
+    }
+    MP4_FAIL_IF_ERR( lsmash_set_media_parameters( p_mp4->p_root, p_mp4->i_track, &media_param ),
+                     "failed to set media parameters for video.\n" );
+    p_mp4->i_video_timescale = lsmash_get_media_timescale( p_mp4->p_root, p_mp4->i_track );
+    MP4_FAIL_IF_ERR( !p_mp4->i_video_timescale, "media timescale for video is broken.\n" );
+
+    return 0;
+}
+
+static int write_headers( hnd_t handle, x264_nal_t *p_nal )
+{
+    mp4_hnd_t *p_mp4 = handle;
+
+    uint32_t sps_size = p_nal[0].i_payload - H264_NALU_LENGTH_SIZE;
+    uint32_t pps_size = p_nal[1].i_payload - H264_NALU_LENGTH_SIZE;
+    uint32_t sei_size = p_nal[2].i_payload;
+
+    uint8_t *sps = p_nal[0].p_payload + H264_NALU_LENGTH_SIZE;
+    uint8_t *pps = p_nal[1].p_payload + H264_NALU_LENGTH_SIZE;
+    uint8_t *sei = p_nal[2].p_payload;
+
+    lsmash_codec_specific_t *cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264,
+                                                                     LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
+
+    lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)cs->data.structured;
+    param->lengthSizeMinusOne = H264_NALU_LENGTH_SIZE - 1;
+
+    /* SPS
+     * The remaining parameters are automatically set by SPS. */
+    if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_SPS, sps, sps_size ) )
+    {
+        MP4_LOG_ERROR( "failed to append SPS.\n" );
+        return -1;
+    }
+
+    /* PPS */
+    if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_PPS, pps, pps_size ) )
+    {
+        MP4_LOG_ERROR( "failed to append PPS.\n" );
+        return -1;
+    }
+
+    if( lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ) )
+    {
+        MP4_LOG_ERROR( "failed to add H.264 specific info.\n" );
+        return -1;
+    }
+
+    lsmash_destroy_codec_specific_data( cs );
+
+    /* Additional extensions */
+    /* Bitrate info */
+    cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE,
+                                            LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
+    if( cs )
+        lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs );
+    lsmash_destroy_codec_specific_data( cs );
+
+    p_mp4->i_sample_entry = lsmash_add_sample_entry( p_mp4->p_root, p_mp4->i_track, p_mp4->summary );
+    MP4_FAIL_IF_ERR( !p_mp4->i_sample_entry,
+                     "failed to add sample entry for video.\n" );
+
+    /* SEI */
+    p_mp4->p_sei_buffer = malloc( sei_size );
+    MP4_FAIL_IF_ERR( !p_mp4->p_sei_buffer,
+                     "failed to allocate sei transition buffer.\n" );
+    memcpy( p_mp4->p_sei_buffer, sei, sei_size );
+    p_mp4->i_sei_size = sei_size;
+
+    return sei_size + sps_size + pps_size;
+}
+
+static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_t *p_picture )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    uint64_t dts, cts;
+
+    if( !p_mp4->i_numframe )
+    {
+        p_mp4->i_start_offset = p_picture->i_dts * -1;
+        p_mp4->i_first_cts = p_mp4->b_dts_compress ? 0 : p_mp4->i_start_offset * p_mp4->i_time_inc;
+        if( p_mp4->b_fragments )
+        {
+            lsmash_edit_t edit;
+            edit.duration   = ISOM_EDIT_DURATION_UNKNOWN32;     /* QuickTime doesn't support 64bit duration. */
+            edit.start_time = p_mp4->i_first_cts;
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
+            MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
+                            "failed to set timeline map for video.\n" );
+        }
+    }
+
+    lsmash_sample_t *p_sample = lsmash_create_sample( i_size + p_mp4->i_sei_size );
+    MP4_FAIL_IF_ERR( !p_sample,
+                     "failed to create a video sample data.\n" );
+
+    if( p_mp4->p_sei_buffer )
+    {
+        memcpy( p_sample->data, p_mp4->p_sei_buffer, p_mp4->i_sei_size );
+        free( p_mp4->p_sei_buffer );
+        p_mp4->p_sei_buffer = NULL;
+    }
+
+    memcpy( p_sample->data + p_mp4->i_sei_size, p_nalu, i_size );
+    p_mp4->i_sei_size = 0;
+
+    if( p_mp4->b_dts_compress )
+    {
+        if( p_mp4->i_numframe == 1 )
+            p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
+        dts = p_mp4->i_numframe > p_mp4->i_delay_frames
+            ? p_picture->i_dts * p_mp4->i_time_inc
+            : p_mp4->i_numframe * (p_mp4->i_init_delta / p_mp4->i_dts_compress_multiplier);
+        cts = p_picture->i_pts * p_mp4->i_time_inc;
+    }
+    else
+    {
+        dts = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
+        cts = (p_picture->i_pts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
+    }
+
+    p_sample->dts = dts;
+    p_sample->cts = cts;
+    p_sample->index = p_mp4->i_sample_entry;
+    p_sample->prop.ra_flags = p_picture->b_keyframe ? ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC : ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE;
+
+    if( p_mp4->b_fragments && p_mp4->i_numframe && p_sample->prop.ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE )
+    {
+        MP4_FAIL_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, p_sample->dts - p_mp4->i_prev_dts ),
+                         "failed to flush the rest of samples.\n" );
+        MP4_FAIL_IF_ERR( lsmash_create_fragment_movie( p_mp4->p_root ),
+                         "failed to create a movie fragment.\n" );
+    }
+
+    /* Append data per sample. */
+    MP4_FAIL_IF_ERR( lsmash_append_sample( p_mp4->p_root, p_mp4->i_track, p_sample ),
+                     "failed to append a video frame.\n" );
+
+    p_mp4->i_prev_dts = dts;
+    p_mp4->i_numframe++;
+
+    return i_size;
+}
+
+const cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };

 
@@ -0,0 +1,419 @@
+/*****************************************************************************
+ * mp4_lsmash.c: mp4 muxer using L-SMASH
+ *****************************************************************************
+ * Copyright (C) 2003-2014 x264 project
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Loren Merritt <lorenm@u.washington.edu>
+ *          Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
+ *          Takashi Hirata <silverfilain@gmail.com>
+ *          golgol7777 <golgol7777@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
+ *****************************************************************************/
+
+#include "output.h"
+#include <lsmash.h>
+
+#define H264_NALU_LENGTH_SIZE 4
+
+/*******************/
+
+#define MP4_LOG_ERROR( ... )                x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ )
+#define MP4_LOG_WARNING( ... )              x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ )
+#define MP4_LOG_INFO( ... )                 x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ )
+#define MP4_FAIL_IF_ERR( cond, ... )        FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ )
+
+/* For close_file() */
+#define MP4_LOG_IF_ERR( cond, ... )\
+if( cond )\
+{\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+}
+
+/* For open_file() */
+#define MP4_FAIL_IF_ERR_EX( cond, ... )\
+if( cond )\
+{\
+    remove_mp4_hnd( p_mp4 );\
+    MP4_LOG_ERROR( __VA_ARGS__ );\
+    return -1;\
+}
+
+/*******************/
+
+typedef struct
+{
+    lsmash_root_t *p_root;
+    lsmash_video_summary_t *summary;
+    int b_stdout;
+    uint32_t i_movie_timescale;
+    uint32_t i_video_timescale;
+    uint32_t i_track;
+    uint32_t i_sample_entry;
+    uint64_t i_time_inc;
+    int64_t i_start_offset;
+    uint64_t i_first_cts;
+    uint64_t i_prev_dts;
+    uint32_t i_sei_size;
+    uint8_t *p_sei_buffer;
+    int i_numframe;
+    int64_t i_init_delta;
+    int i_delay_frames;
+    int b_dts_compress;
+    int i_dts_compress_multiplier;
+    int b_use_recovery;
+    int b_fragments;
+} mp4_hnd_t;
+
+/*******************/
+
+static void remove_mp4_hnd( hnd_t handle )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    if( !p_mp4 )
+        return;
+    if( p_mp4->p_sei_buffer )
+    {
+        free( p_mp4->p_sei_buffer );
+        p_mp4->p_sei_buffer = NULL;
+    }
+    if( p_mp4->p_root )
+    {
+        lsmash_destroy_root( p_mp4->p_root );
+        p_mp4->p_root = NULL;
+    }
+    free( p_mp4 );
+}
+
+/*******************/
+
+static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts )
+{
+    mp4_hnd_t *p_mp4 = handle;
+
+    if( !p_mp4 )
+        return 0;
+
+    if( p_mp4->p_root )
+    {
+        double actual_duration = 0;
+        if( p_mp4->i_track )
+        {
+            /* Flush the rest of samples and add the last sample_delta. */
+            uint32_t last_delta = largest_pts - second_largest_pts;
+            MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ),
+                            "failed to flush the rest of samples.\n" );
+
+            if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 )    /* avoid zero division */
+                actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale;
+            else
+                MP4_LOG_ERROR( "timescale is broken.\n" );
+
+            /*
+             * Declare the explicit time-line mapping.
+             * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment
+             * is given by not the movie timescale but rather the media timescale.
+             * The reason is that ISO media have two time-lines, presentation and media time-line,
+             * and an edit maps the presentation time-line to the media time-line.
+             * According to QuickTime file format specification and the actual playback in QuickTime Player,
+             * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes
+             * the track's media_rate so that the entire media can be used by the track.
+             * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly.
+             * Note: Any demuxers should follow the Edit List Box if it exists.
+             */
+            lsmash_edit_t edit;
+            edit.duration   = actual_duration;
+            edit.start_time = p_mp4->i_first_cts;
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
+            if( !p_mp4->b_fragments )
+            {
+                MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
+                                "failed to set timeline map for video.\n" );
+            }
+            else if( !p_mp4->b_stdout )
+                MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ),
+                                "failed to update timeline map for video.\n" );
+        }
+
+        MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" );
+    }
+
+    remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */
+
+    return 0;
+}
+
+static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
+{
+    *p_handle = NULL;
+
+    int b_regular = strcmp( psz_filename, "-" );
+    b_regular = b_regular && x264_is_regular_file_path( psz_filename );
+    if( b_regular )
+    {
+        FILE *fh = x264_fopen( psz_filename, "wb" );
+        MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename );
+        b_regular = x264_is_regular_file( fh );
+        fclose( fh );
+    }
+
+    mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
+    MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" );
+
+    p_mp4->b_dts_compress = opt->use_dts_compress;
+    p_mp4->b_use_recovery = 0; // we don't really support recovery
+    p_mp4->b_fragments    = !b_regular;
+    p_mp4->b_stdout       = !strcmp( psz_filename, "-" );
+
+    p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" );
+
+    p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO );
+    MP4_FAIL_IF_ERR_EX( !p_mp4->summary,
+                        "failed to allocate memory for summary information of video.\n" );
+    p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO;
+
+    *p_handle = p_mp4;
+
+    return 0;
+}
+
+static int set_param( hnd_t handle, x264_param_t *p_param )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    uint64_t i_media_timescale;
+
+    p_mp4->i_delay_frames = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
+    p_mp4->i_dts_compress_multiplier = p_mp4->b_dts_compress * p_mp4->i_delay_frames + 1;
+
+    i_media_timescale = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier;
+    p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier;
+    MP4_FAIL_IF_ERR( i_media_timescale > UINT32_MAX, "MP4 media timescale %"PRIu64" exceeds maximum\n", i_media_timescale );
+
+    /* Select brands. */
+    lsmash_brand_type brands[6] = { 0 };
+    uint32_t brand_count = 0;
+    brands[brand_count++] = ISOM_BRAND_TYPE_MP42;
+    brands[brand_count++] = ISOM_BRAND_TYPE_MP41;
+    brands[brand_count++] = ISOM_BRAND_TYPE_ISOM;
+    if( p_mp4->b_use_recovery )
+    {
+        brands[brand_count++] = ISOM_BRAND_TYPE_AVC1;   /* sdtp, sgpd, sbgp and visual roll recovery grouping */
+        if( p_param->b_open_gop )
+            brands[brand_count++] = ISOM_BRAND_TYPE_ISO6;   /* cslg and visual random access grouping */
+    }
+
+    /* Set movie parameters. */
+    lsmash_movie_parameters_t movie_param;
+    lsmash_initialize_movie_parameters( &movie_param );
+    movie_param.major_brand = ISOM_BRAND_TYPE_MP42;
+    movie_param.brands = brands;
+    movie_param.number_of_brands = brand_count;
+    MP4_FAIL_IF_ERR( lsmash_set_movie_parameters( p_mp4->p_root, &movie_param ),
+                     "failed to set movie parameters.\n" );
+    p_mp4->i_movie_timescale = lsmash_get_movie_timescale( p_mp4->p_root );
+    MP4_FAIL_IF_ERR( !p_mp4->i_movie_timescale, "movie timescale is broken.\n" );
+
+    /* Create a video track. */
+    p_mp4->i_track = lsmash_create_track( p_mp4->p_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK );
+    MP4_FAIL_IF_ERR( !p_mp4->i_track, "failed to create a video track.\n" );
+
+    p_mp4->summary->width = p_param->i_width;
+    p_mp4->summary->height = p_param->i_height;
+    uint32_t i_display_width = p_param->i_width << 16;
+    uint32_t i_display_height = p_param->i_height << 16;
+    if( p_param->vui.i_sar_width && p_param->vui.i_sar_height )
+    {
+        double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height;
+        if( sar > 1.0 )
+            i_display_width *= sar;
+        else
+            i_display_height /= sar;
+        p_mp4->summary->par_h = p_param->vui.i_sar_width;
+        p_mp4->summary->par_v = p_param->vui.i_sar_height;
+    }
+    p_mp4->summary->color.primaries_index = p_param->vui.i_colorprim;
+    p_mp4->summary->color.transfer_index  = p_param->vui.i_transfer;
+    p_mp4->summary->color.matrix_index    = p_param->vui.i_colmatrix >= 0 ? p_param->vui.i_colmatrix : ISOM_MATRIX_INDEX_UNSPECIFIED;
+    p_mp4->summary->color.full_range      = p_param->vui.b_fullrange >= 0 ? p_param->vui.b_fullrange : 0;
+
+    /* Set video track parameters. */
+    lsmash_track_parameters_t track_param;
+    lsmash_initialize_track_parameters( &track_param );
+    lsmash_track_mode track_mode = ISOM_TRACK_ENABLED | ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW;
+    track_param.mode = track_mode;
+    track_param.display_width = i_display_width;
+    track_param.display_height = i_display_height;
+    MP4_FAIL_IF_ERR( lsmash_set_track_parameters( p_mp4->p_root, p_mp4->i_track, &track_param ),
+                     "failed to set track parameters for video.\n" );
+
+    /* Set video media parameters. */
+    lsmash_media_parameters_t media_param;
+    lsmash_initialize_media_parameters( &media_param );
+    media_param.timescale = i_media_timescale;
+    media_param.media_handler_name = "L-SMASH Video Media Handler";
+    if( p_mp4->b_use_recovery )
+    {
+        media_param.roll_grouping = p_param->b_intra_refresh;
+        media_param.rap_grouping = p_param->b_open_gop;
+    }
+    MP4_FAIL_IF_ERR( lsmash_set_media_parameters( p_mp4->p_root, p_mp4->i_track, &media_param ),
+                     "failed to set media parameters for video.\n" );
+    p_mp4->i_video_timescale = lsmash_get_media_timescale( p_mp4->p_root, p_mp4->i_track );
+    MP4_FAIL_IF_ERR( !p_mp4->i_video_timescale, "media timescale for video is broken.\n" );
+
+    return 0;
+}
+
+static int write_headers( hnd_t handle, x264_nal_t *p_nal )
+{
+    mp4_hnd_t *p_mp4 = handle;
+
+    uint32_t sps_size = p_nal[0].i_payload - H264_NALU_LENGTH_SIZE;
+    uint32_t pps_size = p_nal[1].i_payload - H264_NALU_LENGTH_SIZE;
+    uint32_t sei_size = p_nal[2].i_payload;
+
+    uint8_t *sps = p_nal[0].p_payload + H264_NALU_LENGTH_SIZE;
+    uint8_t *pps = p_nal[1].p_payload + H264_NALU_LENGTH_SIZE;
+    uint8_t *sei = p_nal[2].p_payload;
+
+    lsmash_codec_specific_t *cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264,
+                                                                     LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
+
+    lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)cs->data.structured;
+    param->lengthSizeMinusOne = H264_NALU_LENGTH_SIZE - 1;
+
+    /* SPS
+     * The remaining parameters are automatically set by SPS. */
+    if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_SPS, sps, sps_size ) )
+    {
+        MP4_LOG_ERROR( "failed to append SPS.\n" );
+        return -1;
+    }
+
+    /* PPS */
+    if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_PPS, pps, pps_size ) )
+    {
+        MP4_LOG_ERROR( "failed to append PPS.\n" );
+        return -1;
+    }
+
+    if( lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ) )
+    {
+        MP4_LOG_ERROR( "failed to add H.264 specific info.\n" );
+        return -1;
+    }
+
+    lsmash_destroy_codec_specific_data( cs );
+
+    /* Additional extensions */
+    /* Bitrate info */
+    cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE,
+                                            LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
+    if( cs )
+        lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs );
+    lsmash_destroy_codec_specific_data( cs );
+
+    p_mp4->i_sample_entry = lsmash_add_sample_entry( p_mp4->p_root, p_mp4->i_track, p_mp4->summary );
+    MP4_FAIL_IF_ERR( !p_mp4->i_sample_entry,
+                     "failed to add sample entry for video.\n" );
+
+    /* SEI */
+    p_mp4->p_sei_buffer = malloc( sei_size );
+    MP4_FAIL_IF_ERR( !p_mp4->p_sei_buffer,
+                     "failed to allocate sei transition buffer.\n" );
+    memcpy( p_mp4->p_sei_buffer, sei, sei_size );
+    p_mp4->i_sei_size = sei_size;
+
+    return sei_size + sps_size + pps_size;
+}
+
+static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_t *p_picture )
+{
+    mp4_hnd_t *p_mp4 = handle;
+    uint64_t dts, cts;
+
+    if( !p_mp4->i_numframe )
+    {
+        p_mp4->i_start_offset = p_picture->i_dts * -1;
+        p_mp4->i_first_cts = p_mp4->b_dts_compress ? 0 : p_mp4->i_start_offset * p_mp4->i_time_inc;
+        if( p_mp4->b_fragments )
+        {
+            lsmash_edit_t edit;
+            edit.duration   = ISOM_EDIT_DURATION_UNKNOWN32;     /* QuickTime doesn't support 64bit duration. */
+            edit.start_time = p_mp4->i_first_cts;
+            edit.rate       = ISOM_EDIT_MODE_NORMAL;
+            MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
+                            "failed to set timeline map for video.\n" );
+        }
+    }
+
+    lsmash_sample_t *p_sample = lsmash_create_sample( i_size + p_mp4->i_sei_size );
+    MP4_FAIL_IF_ERR( !p_sample,
+                     "failed to create a video sample data.\n" );
+
+    if( p_mp4->p_sei_buffer )
+    {
+        memcpy( p_sample->data, p_mp4->p_sei_buffer, p_mp4->i_sei_size );
+        free( p_mp4->p_sei_buffer );
+        p_mp4->p_sei_buffer = NULL;
+    }
+
+    memcpy( p_sample->data + p_mp4->i_sei_size, p_nalu, i_size );
+    p_mp4->i_sei_size = 0;
+
+    if( p_mp4->b_dts_compress )
+    {
+        if( p_mp4->i_numframe == 1 )
+            p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
+        dts = p_mp4->i_numframe > p_mp4->i_delay_frames
+            ? p_picture->i_dts * p_mp4->i_time_inc
+            : p_mp4->i_numframe * (p_mp4->i_init_delta / p_mp4->i_dts_compress_multiplier);
+        cts = p_picture->i_pts * p_mp4->i_time_inc;
+    }
+    else
+    {
+        dts = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
+        cts = (p_picture->i_pts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
+    }
+
+    p_sample->dts = dts;
+    p_sample->cts = cts;
+    p_sample->index = p_mp4->i_sample_entry;
+    p_sample->prop.ra_flags = p_picture->b_keyframe ? ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC : ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE;
+
+    if( p_mp4->b_fragments && p_mp4->i_numframe && p_sample->prop.ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE )
+    {
+        MP4_FAIL_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, p_sample->dts - p_mp4->i_prev_dts ),
+                         "failed to flush the rest of samples.\n" );
+        MP4_FAIL_IF_ERR( lsmash_create_fragment_movie( p_mp4->p_root ),
+                         "failed to create a movie fragment.\n" );
+    }
+
+    /* Append data per sample. */
+    MP4_FAIL_IF_ERR( lsmash_append_sample( p_mp4->p_root, p_mp4->i_track, p_sample ),
+                     "failed to append a video frame.\n" );
+
+    p_mp4->i_prev_dts = dts;
+    p_mp4->i_numframe++;
+
+    return i_size;
+}
+
+const cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };
​

x264-snapshot-20130723-2245.tar.bz2/output/output.h -> x264-snapshot-20140321-2245.tar.bz2/output/output.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * output.h: x264 file output modules
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
​

x264-snapshot-20130723-2245.tar.bz2/output/raw.c -> x264-snapshot-20140321-2245.tar.bz2/output/raw.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * raw.c: raw muxer
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -30,7 +30,7 @@
 {
     if( !strcmp( psz_filename, "-" ) )
         *p_handle = stdout;
-    else if( !(*p_handle = fopen( psz_filename, "w+b" )) )
+    else if( !(*p_handle = x264_fopen( psz_filename, "w+b" )) )
         return -1;
 
     return 0;
​

x264-snapshot-20130723-2245.tar.bz2/tools/checkasm-a.asm -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm-a.asm Changed

 
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* checkasm-a.asm: assembly check tool
 ;*****************************************************************************
-;* Copyright (C) 2008-2013 x264 project
+;* Copyright (C) 2008-2014 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Henrik Gramner <henrik@gramner.com>
​

x264-snapshot-20130723-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm.c Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * checkasm.c: assembly check tool
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -191,7 +191,6 @@
                     b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
                     b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
                     b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" :
-                    b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
                     b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
                     b->cpu&X264_CPU_BMI2 ? "_bmi2" :
                     b->cpu&X264_CPU_BMI1 ? "_bmi1" :
@@ -201,7 +200,7 @@
                     b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
 #endif
                     "",
-                    ((int64_t)10*b->cycles/b->den - nop_time)/4 );
+                    (int64_t)(10*b->cycles/b->den - nop_time)/4 );
         }
 }
 
@@ -407,7 +406,7 @@
                 } \
                 else \
                     call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
-                if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
+                if( memcmp(res_c, res_asm, N*sizeof(int)) ) \
                 { \
                     ok = 0; \
                     fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
@@ -1452,8 +1451,66 @@
                 }
         }
     }
+
+    if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
+    {
+        set_func_name( "plane_copy_deinterleave_rgb" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 2) >> 2;
+            int h = plane_specs[i].h;
+            intptr_t src_stride = plane_specs[i].src_stride;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t offv = dst_stride*h + 16;
+
+            for( int pw = 3; pw <= 4; pw++ )
+            {
+                memset( pbuf3, 0, 0x1000 );
+                memset( pbuf4, 0, 0x1000 );
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                for( int y = 0; y < h; y++ )
+                    if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
+                    {
+                        ok = 0;
+                        fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
+                        break;
+                    }
+            }
+        }
+    }
     report( "plane_copy :" );
 
+    if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
+    {
+        set_func_name( "plane_copy_deinterleave_v210" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 1) >> 1;
+            int h = plane_specs[i].h;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
+            intptr_t offv = dst_stride*h + 32;
+            memset( pbuf3, 0, 0x1000 );
+            memset( pbuf4, 0, 0x1000 );
+            call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            for( int y = 0; y < h; y++ )
+                if( memcmp( pbuf3+y*dst_stride,      pbuf4+y*dst_stride,      w*sizeof(uint16_t) ) ||
+                    memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
+                {
+                    ok = 0;
+                    fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+                    break;
+                }
+        }
+    }
+    report( "v210 :" );
+
     if( mc_a.hpel_filter != mc_ref.hpel_filter )
     {
         pixel *srchpel = pbuf1+8+2*64;
@@ -1541,16 +1598,17 @@
     INTEGRAL_INIT( integral_init8v, 9, sum, stride );
     report( "integral init :" );
 
+    ok = 1; used_asm = 0;
     if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
     {
-        ok = 1; used_asm = 1;
+        used_asm = 1;
         x264_emms();
         for( int i = 0; i < 10; i++ )
         {
-            float fps_factor = (rand()&65535) / 256.;
-            set_func_name( "mbtree_propagate" );
-            int *dsta = (int*)buf3;
-            int *dstc = dsta+400;
+            float fps_factor = (rand()&65535) / 65535.0f;
+            set_func_name( "mbtree_propagate_cost" );
+            int16_t *dsta = (int16_t*)buf3;
+            int16_t *dstc = dsta+400;
             uint16_t *prop = (uint16_t*)buf1;
             uint16_t *intra = (uint16_t*)buf4;
             uint16_t *inter = intra+128;
@@ -1572,12 +1630,60 @@
             {
                 ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
                 if( !ok )
-                    fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
+                    fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
             }
         }
-        report( "mbtree propagate :" );
     }
 
+    if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list )
+    {
+        used_asm = 1;
+        for( int i = 0; i < 8; i++ )
+        {
+            set_func_name( "mbtree_propagate_list" );
+            x264_t h;
+            int height = 4;
+            int width = 128;
+            int size = width*height;
+            h.mb.i_mb_stride = width;
+            h.mb.i_mb_width = width;
+            h.mb.i_mb_height = height;
+
+            uint16_t *ref_costsc = (uint16_t*)buf3;
+            uint16_t *ref_costsa = (uint16_t*)buf4;
+            int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size);
+            int16_t *propagate_amount = (int16_t*)(mvs + width);
+            uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width);
+            h.scratch_buffer2 = (uint8_t*)(ref_costsa + size);
+            int bipred_weight = (rand()%63)+1;
+            int list = i&1;
+            for( int j = 0; j < size; j++ )
+                ref_costsc[j] = ref_costsa[j] = rand()&32767;
+            for( int j = 0; j < width; j++ )
+            {
+                static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}};
+                for( int k = 0; k < 2; k++ )
+                    mvs[j][k] = (rand()&127) - 64;
+                propagate_amount[j] = rand()&32767;
+                lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT;
+            }
+
+            call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+
+            for( int j = 0; j < size && ok; j++ )
+            {
+                ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1;
+                if( !ok )
+                    fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] );
+            }
+
+            call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+        }
+    }
+    report( "mbtree :" );
+
     if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
     {
         set_func_name( "memcpy_aligned" );
@@ -1779,7 +1885,7 @@
         }
 
         h->param.rc.i_qp_min = 0;
-        h->param.rc.i_qp_max = QP_MAX;
+        h->param.rc.i_qp_max = QP_MAX_SPEC;
         x264_cqm_init( h );
         x264_quant_init( h, 0, &qf_c );
         x264_quant_init( h, cpu_ref, &qf_ref );
@@ -2504,7 +2610,7 @@
 {
     *cpu_ref = *cpu_new;
     *cpu_new |= flags;
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
     *cpu_new |= X264_CPU_STACK_MOD4;
 #endif
     if( *cpu_new & X264_CPU_SSE2_IS_FAST )
@@ -2549,11 +2655,6 @@
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" );
         cpu1 &= ~X264_CPU_SLOW_CTZ;
     }
-    if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN )
-    {
-        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" );
-        cpu1 &= ~X264_CPU_SSE_MISALIGN;
-    }
     if( x264_cpu_detect() & X264_CPU_LZCNT )
     {
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * checkasm.c: assembly check tool
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
@@ -191,7 +191,6 @@
                     b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
                     b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
                     b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" :
-                    b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
                     b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
                     b->cpu&X264_CPU_BMI2 ? "_bmi2" :
                     b->cpu&X264_CPU_BMI1 ? "_bmi1" :
@@ -201,7 +200,7 @@
                     b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
 #endif
                     "",
-                    ((int64_t)10*b->cycles/b->den - nop_time)/4 );
+                    (int64_t)(10*b->cycles/b->den - nop_time)/4 );
         }
 }
 
@@ -407,7 +406,7 @@
                 } \
                 else \
                     call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
-                if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
+                if( memcmp(res_c, res_asm, N*sizeof(int)) ) \
                 { \
                     ok = 0; \
                     fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
@@ -1452,8 +1451,66 @@
                 }
         }
     }
+
+    if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
+    {
+        set_func_name( "plane_copy_deinterleave_rgb" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 2) >> 2;
+            int h = plane_specs[i].h;
+            intptr_t src_stride = plane_specs[i].src_stride;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t offv = dst_stride*h + 16;
+
+            for( int pw = 3; pw <= 4; pw++ )
+            {
+                memset( pbuf3, 0, 0x1000 );
+                memset( pbuf4, 0, 0x1000 );
+                call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+                for( int y = 0; y < h; y++ )
+                    if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
+                        memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
+                    {
+                        ok = 0;
+                        fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
+                        break;
+                    }
+            }
+        }
+    }
     report( "plane_copy :" );
 
+    if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
+    {
+        set_func_name( "plane_copy_deinterleave_v210" );
+        used_asm = 1;
+        for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+        {
+            int w = (plane_specs[i].w + 1) >> 1;
+            int h = plane_specs[i].h;
+            intptr_t dst_stride = ALIGN( w, 16 );
+            intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
+            intptr_t offv = dst_stride*h + 32;
+            memset( pbuf3, 0, 0x1000 );
+            memset( pbuf4, 0, 0x1000 );
+            call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+            for( int y = 0; y < h; y++ )
+                if( memcmp( pbuf3+y*dst_stride,      pbuf4+y*dst_stride,      w*sizeof(uint16_t) ) ||
+                    memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
+                {
+                    ok = 0;
+                    fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+                    break;
+                }
+        }
+    }
+    report( "v210 :" );
+
     if( mc_a.hpel_filter != mc_ref.hpel_filter )
     {
         pixel *srchpel = pbuf1+8+2*64;
@@ -1541,16 +1598,17 @@
     INTEGRAL_INIT( integral_init8v, 9, sum, stride );
     report( "integral init :" );
 
+    ok = 1; used_asm = 0;
     if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
     {
-        ok = 1; used_asm = 1;
+        used_asm = 1;
         x264_emms();
         for( int i = 0; i < 10; i++ )
         {
-            float fps_factor = (rand()&65535) / 256.;
-            set_func_name( "mbtree_propagate" );
-            int *dsta = (int*)buf3;
-            int *dstc = dsta+400;
+            float fps_factor = (rand()&65535) / 65535.0f;
+            set_func_name( "mbtree_propagate_cost" );
+            int16_t *dsta = (int16_t*)buf3;
+            int16_t *dstc = dsta+400;
             uint16_t *prop = (uint16_t*)buf1;
             uint16_t *intra = (uint16_t*)buf4;
             uint16_t *inter = intra+128;
@@ -1572,12 +1630,60 @@
             {
                 ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
                 if( !ok )
-                    fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
+                    fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
             }
         }
-        report( "mbtree propagate :" );
     }
 
+    if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list )
+    {
+        used_asm = 1;
+        for( int i = 0; i < 8; i++ )
+        {
+            set_func_name( "mbtree_propagate_list" );
+            x264_t h;
+            int height = 4;
+            int width = 128;
+            int size = width*height;
+            h.mb.i_mb_stride = width;
+            h.mb.i_mb_width = width;
+            h.mb.i_mb_height = height;
+
+            uint16_t *ref_costsc = (uint16_t*)buf3;
+            uint16_t *ref_costsa = (uint16_t*)buf4;
+            int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size);
+            int16_t *propagate_amount = (int16_t*)(mvs + width);
+            uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width);
+            h.scratch_buffer2 = (uint8_t*)(ref_costsa + size);
+            int bipred_weight = (rand()%63)+1;
+            int list = i&1;
+            for( int j = 0; j < size; j++ )
+                ref_costsc[j] = ref_costsa[j] = rand()&32767;
+            for( int j = 0; j < width; j++ )
+            {
+                static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}};
+                for( int k = 0; k < 2; k++ )
+                    mvs[j][k] = (rand()&127) - 64;
+                propagate_amount[j] = rand()&32767;
+                lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT;
+            }
+
+            call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+
+            for( int j = 0; j < size && ok; j++ )
+            {
+                ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1;
+                if( !ok )
+                    fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] );
+            }
+
+            call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+            call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
+        }
+    }
+    report( "mbtree :" );
+
     if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
     {
         set_func_name( "memcpy_aligned" );
@@ -1779,7 +1885,7 @@
         }
 
         h->param.rc.i_qp_min = 0;
-        h->param.rc.i_qp_max = QP_MAX;
+        h->param.rc.i_qp_max = QP_MAX_SPEC;
         x264_cqm_init( h );
         x264_quant_init( h, 0, &qf_c );
         x264_quant_init( h, cpu_ref, &qf_ref );
@@ -2504,7 +2610,7 @@
 {
     *cpu_ref = *cpu_new;
     *cpu_new |= flags;
-#if BROKEN_STACK_ALIGNMENT
+#if STACK_ALIGNMENT < 16
     *cpu_new |= X264_CPU_STACK_MOD4;
 #endif
     if( *cpu_new & X264_CPU_SSE2_IS_FAST )
@@ -2549,11 +2655,6 @@
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" );
         cpu1 &= ~X264_CPU_SLOW_CTZ;
     }
-    if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN )
-    {
-        ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" );
-        cpu1 &= ~X264_CPU_SSE_MISALIGN;
-    }
     if( x264_cpu_detect() & X264_CPU_LZCNT )
     {
         ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );
​

x264-snapshot-20130723-2245.tar.bz2/tools/cltostr.pl -> x264-snapshot-20140321-2245.tar.bz2/tools/cltostr.pl Changed

 
@@ -1,6 +1,6 @@
 # Perl script used for compiling OpenCL src into x264 binary
 #
-# Copyright (C) 2013 x264 project
+# Copyright (C) 2013-2014 x264 project
 # Authors: Steve Borho <sborho@multicorewareinc.com>
 
 use Digest::MD5 qw(md5_hex);
​

x264-snapshot-20130723-2245.tar.bz2/version.sh -> x264-snapshot-20140321-2245.tar.bz2/version.sh Changed

 
@@ -1,5 +1,8 @@
-#!/bin/bash
+#!/bin/sh
 [ -n "$1" ] && cd $1
+
+git_version() {
+trap 'rm -f config.git-hash' EXIT
 git rev-list HEAD | sort > config.git-hash
 LOCALVER=`wc -l config.git-hash | awk '{print $1}'`
 if [ $LOCALVER \> 1 ] ; then
@@ -14,11 +17,13 @@
         VER="${VER}M"
     fi
     VER="$VER $(git rev-list HEAD -n 1 | cut -c 1-7)"
-    echo "#define X264_VERSION \" r$VER\""
-else
-    echo "#define X264_VERSION \"\""
-    VER="x"
+    VERSION=" r$VER"
 fi
-rm -f config.git-hash
+}
+
+VER="x"
+VERSION=""
+[ -d .git ] && (type git >/dev/null 2>&1) && git_version
+echo "#define X264_VERSION \"$VERSION\""
 API=`grep '#define X264_BUILD' < x264.h | sed -e 's/.* \([1-9][0-9]*\).*/\1/'`
 echo "#define X264_POINTVER \"0.$API.$VER\""
​

x264-snapshot-20130723-2245.tar.bz2/x264.c -> x264-snapshot-20140321-2245.tar.bz2/x264.c Changed

@@ -1,13 +1,14 @@
 /*****************************************************************************
  * x264: top-level x264cli functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Steven Walters <kemuri9@gmail.com>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Kieran Kunhya <kieran@kunhya.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -27,6 +28,15 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
+#ifdef _WIN32
+/* The following two defines must be located before the inclusion of any system header files. */
+#define WINVER       0x0500
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#include <io.h>       /* _setmode() */
+#include <fcntl.h>    /* _O_BINARY */
+#endif
+
 #include <signal.h>
 #define _GNU_SOURCE
 #include <getopt.h>
@@ -38,13 +48,6 @@
 
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
 
-#ifdef _WIN32
-#include <windows.h>
-#else
-#define GetConsoleTitle(t,n)
-#define SetConsoleTitle(t)
-#endif
-
 #if HAVE_LAVF
 #undef DECLARE_ALIGNED
 #include <libavformat/avformat.h>
@@ -61,18 +64,89 @@
 #include <ffms.h>
 #endif
 
+#ifdef _WIN32
+#define CONSOLE_TITLE_SIZE 200
+static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L"";
+
+void x264_cli_set_console_title( const char *title )
+{
+    wchar_t title_utf16[CONSOLE_TITLE_SIZE];
+    if( utf8_to_utf16( title, title_utf16 ) )
+        SetConsoleTitleW( title_utf16 );
+}
+
+static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size )
+{
+    int invalid;
+    return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid;
+}
+
+/* Some external libraries doesn't support Unicode in filenames,
+ * as a workaround we can try to get an ANSI filename instead. */
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    if( utf8_to_utf16( filename, filename_utf16 ) )
+    {
+        if( create_file )
+        {
+            /* Create the file using the Unicode filename if it doesn't already exist. */
+            FILE *fh = _wfopen( filename_utf16, L"ab" );
+            if( fh )
+                fclose( fh );
+        }
+
+        /* Check if the filename already is valid ANSI. */
+        if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+            return 1;
+
+        /* Check for a legacy 8.3 short filename. */
+        int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH );
+        if( short_length > 0 && short_length < MAX_PATH )
+            if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+                return 1;
+    }
+    return 0;
+}
+
+/* Retrieve command line arguments as UTF-8. */
+static int get_argv_utf8( int *argc_ptr, char ***argv_ptr )
+{
+    int ret = 0;
+    wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr );
+    if( argv_utf16 )
+    {
+        int argc = *argc_ptr;
+        int offset = (argc+1) * sizeof(char*);
+        int size = offset;
+
+        for( int i = 0; i < argc; i++ )
+            size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL );
+
+        char **argv = *argv_ptr = malloc( size );
+        if( argv )
+        {
+            for( int i = 0; i < argc; i++ )
+            {
+                argv[i] = (char*)argv + offset;
+                offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL );
+            }
+            argv[argc] = NULL;
+            ret = 1;
+        }
+        LocalFree( argv_utf16 );
+    }
+    return ret;
+}
+#endif
+
 /* Ctrl-C handler */
 static volatile int b_ctrl_c = 0;
-static int          b_exit_on_ctrl_c = 0;
 static void sigint_handler( int a )
 {
-    if( b_exit_on_ctrl_c )
-        exit(0);
     b_ctrl_c = 1;
 }
 
-static char UNUSED originalCTitle[200] = "";
-
 typedef struct {
     int b_progress;
     int i_seek;
@@ -114,7 +188,7 @@
     "raw",
     "mkv",
     "flv",
-#if HAVE_GPAC
+#if HAVE_GPAC || HAVE_LSMASH
     "mp4",
 #endif
     0
@@ -211,7 +285,7 @@
     fprintf( stderr, "%s [%s]: ", name, s_level );
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -221,7 +295,7 @@
         return;
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -275,18 +349,22 @@
     FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
 
 #ifdef _WIN32
-    _setmode(_fileno(stdin), _O_BINARY);
-    _setmode(_fileno(stdout), _O_BINARY);
-#endif
+    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
 
-    GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
+    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
+    _setmode( _fileno( stdin ),  _O_BINARY );
+    _setmode( _fileno( stdout ), _O_BINARY );
+    _setmode( _fileno( stderr ), _O_BINARY );
+#endif
 
     /* Parse command line */
     if( parse( argc, argv, &param, &opt ) < 0 )
         ret = -1;
 
+#ifdef _WIN32
     /* Restore title; it can be changed by input modules */
-    SetConsoleTitle( originalCTitle );
+    SetConsoleTitleW( org_console_title );
+#endif
 
     /* Control-C handler */
     signal( SIGINT, sigint_handler );
@@ -306,7 +384,10 @@
     if( opt.qpfile )
         fclose( opt.qpfile );
 
-    SetConsoleTitle( originalCTitle );
+#ifdef _WIN32
+    SetConsoleTitleW( org_console_title );
+    free( argv );
+#endif
 
     return ret;
 }
@@ -339,16 +420,19 @@
     printf( INDENT );
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
     {
-        printf( "%s", x264_cli_csps[i].name );
-        if( i+1 < X264_CSP_CLI_MAX )
-            printf( ", " );
+        if( x264_cli_csps[i].name )
+        {
+            printf( "%s", x264_cli_csps[i].name );
+            if( i+1 < X264_CSP_CLI_MAX )
+                printf( ", " );
+        }
     }
 #if HAVE_LAVF
     printf( "\n" );
     printf( "                              - valid csps for `lavf' demuxer:\n" );
     printf( INDENT );
     size_t line_len = strlen( INDENT );
-    for( enum PixelFormat i = PIX_FMT_NONE+1; i < PIX_FMT_NB; i++ )
+    for( enum PixelFormat i = AV_PIX_FMT_NONE+1; i < AV_PIX_FMT_NB; i++ )
     {
         const char *pfname = av_get_pix_fmt_name( i );
         if( pfname )
@@ -361,7 +445,7 @@
             }
             printf( "%s", pfname );
             line_len += name_len;
-            if( i+1 < PIX_FMT_NB )
+            if( i+1 < AV_PIX_FMT_NB )
             {
                 printf( ", " );
                 line_len += 2;
@@ -389,7 +473,7 @@
         " .264 -> Raw bytestream\n"
         " .mkv -> Matroska\n"
         " .flv -> Flash Video\n"
-        " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
+        " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n"
         "Output bit depth: %d (configured at compile time)\n"
         "\n"
         "Options:\n"
@@ -415,7 +499,9 @@
         "no",
 #endif
 #if HAVE_GPAC
-        "yes",
+        "gpac",
+#elif HAVE_LSMASH
+        "lsmash",
 #else
         "no",
 #endif
@@ -764,6 +850,8 @@
 
     H2( "      --nal-hrd <string>      Signal HRD information (requires vbv-bufsize)\n"
         "                                  - none, vbr, cbr (cbr not allowed in .mp4)\n" );
+    H2( "      --filler                Force hard-CBR and generate filler (implied by\n"
+        "                              --nal-hrd cbr)\n" );
     H2( "      --pic-struct            Force pic_struct in Picture Timing SEI\n" );
     H2( "      --crop-rect <string>    Add 'left,top,right,bottom' to the bitstream-level\n"
         "                              cropping rectangle\n" );
@@ -792,6 +880,8 @@
     H0( "      --frames <integer>      Maximum number of frames to encode\n" );
     H0( "      --level <string>        Specify level (as defined by Annex A)\n" );
     H1( "      --bluray-compat         Enable compatibility hacks for Blu-ray support\n" );
+    H1( "      --avcintra-class <integer> Use compatibility hacks for AVC-Intra class\n"
+        "                                  - 50, 100, 200\n" );
     H1( "      --stitchable            Don't optimize headers based on video content\n"
         "                              Ensures ability to recombine a segmented encode\n" );
     H1( "\n" );
@@ -815,8 +905,7 @@
     H2( "      --no-asm                Disable all CPU optimizations\n" );
     H2( "      --opencl                Enable use of OpenCL\n" );
     H2( "      --opencl-clbin <string> Specify path of compiled OpenCL kernel cache\n" );
-    H2( "      --opencl-device <integer>  Specify OpenCL device ordinal\n" );
-    H2( "      --visualize             Show MB types overlayed on the encoded video\n" );
+    H2( "      --opencl-device <integer> Specify OpenCL device ordinal\n" );
     H2( "      --dump-yuv <string>     Save reconstructed frames\n" );
     H2( "      --sps-id <integer>      Set SPS and PPS id numbers [%d]\n", defaults->i_sps_id );
     H2( "      --aud                   Use access unit delimiters\n" );
@@ -848,7 +937,6 @@
     OPT_THREAD_INPUT,
     OPT_QUIET,
     OPT_NOPROGRESS,
-    OPT_VISUALIZE,
     OPT_LONGHELP,
     OPT_PROFILE,
     OPT_PRESET,
@@ -895,6 +983,7 @@
     { "b-pyramid",   required_argument, NULL, 0 },
     { "open-gop",          no_argument, NULL, 0 },
     { "bluray-compat",     no_argument, NULL, 0 },
+    { "avcintra-class", required_argument, NULL, 0 },
     { "min-keyint",  required_argument, NULL, 'i' },
     { "keyint",      required_argument, NULL, 'I' },
     { "intra-refresh",     no_argument, NULL, 0 },
@@ -997,7 +1086,6 @@
     { "verbose",           no_argument, NULL, 'v' },
     { "log-level",   required_argument, NULL, OPT_LOG_LEVEL },
     { "no-progress",       no_argument, NULL, OPT_NOPROGRESS },
-    { "visualize",         no_argument, NULL, OPT_VISUALIZE },
     { "dump-yuv",    required_argument, NULL, 0 },
     { "sps-id",      required_argument, NULL, 0 },
     { "aud",               no_argument, NULL, 0 },
@@ -1041,6 +1129,7 @@
     { "output-csp",  required_argument, NULL, OPT_OUTPUT_CSP },
     { "input-range", required_argument, NULL, OPT_INPUT_RANGE },
     { "stitchable",        no_argument, NULL, 0 },
+    { "filler",            no_argument, NULL, 0 },
     {0, 0, 0, 0}
 };
 
@@ -1052,7 +1141,7 @@
 
     if( !strcasecmp( ext, "mp4" ) )
     {
-#if HAVE_GPAC
+#if HAVE_GPAC || HAVE_LSMASH
         cli_output = mp4_output;
         param->b_annexb = 0;
         param->b_repeat_headers = 0;
@@ -1094,7 +1183,7 @@
     b_regular = b_regular && x264_is_regular_file_path( filename );
     if( b_regular )
     {
-        FILE *f = fopen( filename, "r" );
+        FILE *f = x264_fopen( filename, "r" );
         if( f )
         {
             b_regular = x264_is_regular_file( f );
@@ -1197,7 +1286,7 @@
     int csp = info->csp & X264_CSP_MASK;
     if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) )
         param->i_csp = X264_CSP_I420;
-    else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_NV16) )
+    else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_V210) )
         param->i_csp = X264_CSP_I422;
     else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) )
         param->i_csp = X264_CSP_I444;
@@ -1338,7 +1427,7 @@
                 input_opt.index_file = optarg;
                 break;
             case OPT_QPFILE:
-                opt->qpfile = fopen( optarg, "rb" );
+                opt->qpfile = x264_fopen( optarg, "rb" );
                 FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
                 if( !x264_is_regular_file( opt->qpfile ) )
                 {
@@ -1366,14 +1455,6 @@
             case OPT_NOPROGRESS:
                 opt->b_progress = 0;
                 break;
-            case OPT_VISUALIZE:
-#if HAVE_VISUALIZE
-                param->b_visualize = 1;
-                b_exit_on_ctrl_c = 1;
-#else
-                x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
-#endif
-                break;
             case OPT_TUNE:
             case OPT_PRESET:
                 break;
@@ -1397,7 +1478,7 @@
                 tcfile_name = optarg;
                 break;
             case OPT_TCFILE_OUT:
-                opt->tcfile_out = fopen( optarg, "wb" );
+                opt->tcfile_out = x264_fopen( optarg, "wb" );
                 FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
                 break;
             case OPT_TIMEBASE:
@@ -1498,8 +1579,11 @@
     info.fps_den    = param->i_fps_den;
     info.fullrange  = input_opt.input_range == RANGE_PC;
     info.interlaced = param->b_interlaced;
-    info.sar_width  = param->vui.i_sar_width;
-    info.sar_height = param->vui.i_sar_height;
+    if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
+    {
+        info.sar_width  = param->vui.i_sar_width;
+        info.sar_height = param->vui.i_sar_height;
+    }
     info.tff        = param->b_tff;
     info.vfr        = param->b_vfr_input;
 
@@ -1542,7 +1626,7 @@
 #endif
 
     /* override detected values by those specified by the user */
-    if( param->vui.i_sar_width && param->vui.i_sar_height )
+    if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
     {
         info.sar_width  = param->vui.i_sar_width;
         info.sar_height = param->vui.i_sar_height;
@@ -1721,11 +1805,9 @@
                  eta/3600, (eta/60)%60, eta%60 );
     }
     else
-    {
         sprintf( buf, "x264 %d frames: %.2f fps, %.2f kb/s", i_frame, fps, bitrate );
-    }
     fprintf( stderr, "%s  \r", buf+5 );
-    SetConsoleTitle( buf );
+    x264_cli_set_console_title( buf );
     fflush( stderr ); // needed in windows
     return i_time;
 }

 
@@ -1,13 +1,14 @@
 /*****************************************************************************
  * x264: top-level x264cli functions
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Loren Merritt <lorenm@u.washington.edu>
  *          Laurent Aimar <fenrir@via.ecp.fr>
  *          Steven Walters <kemuri9@gmail.com>
  *          Jason Garrett-Glaser <darkshikari@gmail.com>
  *          Kieran Kunhya <kieran@kunhya.com>
+ *          Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -27,6 +28,15 @@
  * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
+#ifdef _WIN32
+/* The following two defines must be located before the inclusion of any system header files. */
+#define WINVER       0x0500
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#include <io.h>       /* _setmode() */
+#include <fcntl.h>    /* _O_BINARY */
+#endif
+
 #include <signal.h>
 #define _GNU_SOURCE
 #include <getopt.h>
@@ -38,13 +48,6 @@
 
 #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
 
-#ifdef _WIN32
-#include <windows.h>
-#else
-#define GetConsoleTitle(t,n)
-#define SetConsoleTitle(t)
-#endif
-
 #if HAVE_LAVF
 #undef DECLARE_ALIGNED
 #include <libavformat/avformat.h>
@@ -61,18 +64,89 @@
 #include <ffms.h>
 #endif
 
+#ifdef _WIN32
+#define CONSOLE_TITLE_SIZE 200
+static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L"";
+
+void x264_cli_set_console_title( const char *title )
+{
+    wchar_t title_utf16[CONSOLE_TITLE_SIZE];
+    if( utf8_to_utf16( title, title_utf16 ) )
+        SetConsoleTitleW( title_utf16 );
+}
+
+static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size )
+{
+    int invalid;
+    return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid;
+}
+
+/* Some external libraries doesn't support Unicode in filenames,
+ * as a workaround we can try to get an ANSI filename instead. */
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file )
+{
+    wchar_t filename_utf16[MAX_PATH];
+    if( utf8_to_utf16( filename, filename_utf16 ) )
+    {
+        if( create_file )
+        {
+            /* Create the file using the Unicode filename if it doesn't already exist. */
+            FILE *fh = _wfopen( filename_utf16, L"ab" );
+            if( fh )
+                fclose( fh );
+        }
+
+        /* Check if the filename already is valid ANSI. */
+        if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+            return 1;
+
+        /* Check for a legacy 8.3 short filename. */
+        int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH );
+        if( short_length > 0 && short_length < MAX_PATH )
+            if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
+                return 1;
+    }
+    return 0;
+}
+
+/* Retrieve command line arguments as UTF-8. */
+static int get_argv_utf8( int *argc_ptr, char ***argv_ptr )
+{
+    int ret = 0;
+    wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr );
+    if( argv_utf16 )
+    {
+        int argc = *argc_ptr;
+        int offset = (argc+1) * sizeof(char*);
+        int size = offset;
+
+        for( int i = 0; i < argc; i++ )
+            size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL );
+
+        char **argv = *argv_ptr = malloc( size );
+        if( argv )
+        {
+            for( int i = 0; i < argc; i++ )
+            {
+                argv[i] = (char*)argv + offset;
+                offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL );
+            }
+            argv[argc] = NULL;
+            ret = 1;
+        }
+        LocalFree( argv_utf16 );
+    }
+    return ret;
+}
+#endif
+
 /* Ctrl-C handler */
 static volatile int b_ctrl_c = 0;
-static int          b_exit_on_ctrl_c = 0;
 static void sigint_handler( int a )
 {
-    if( b_exit_on_ctrl_c )
-        exit(0);
     b_ctrl_c = 1;
 }
 
-static char UNUSED originalCTitle[200] = "";
-
 typedef struct {
     int b_progress;
     int i_seek;
@@ -114,7 +188,7 @@
     "raw",
     "mkv",
     "flv",
-#if HAVE_GPAC
+#if HAVE_GPAC || HAVE_LSMASH
     "mp4",
 #endif
     0
@@ -211,7 +285,7 @@
     fprintf( stderr, "%s [%s]: ", name, s_level );
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -221,7 +295,7 @@
         return;
     va_list arg;
     va_start( arg, fmt );
-    vfprintf( stderr, fmt, arg );
+    x264_vfprintf( stderr, fmt, arg );
     va_end( arg );
 }
 
@@ -275,18 +349,22 @@
     FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
 
 #ifdef _WIN32
-    _setmode(_fileno(stdin), _O_BINARY);
-    _setmode(_fileno(stdout), _O_BINARY);
-#endif
+    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
 
-    GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
+    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
+    _setmode( _fileno( stdin ),  _O_BINARY );
+    _setmode( _fileno( stdout ), _O_BINARY );
+    _setmode( _fileno( stderr ), _O_BINARY );
+#endif
 
     /* Parse command line */
     if( parse( argc, argv, &param, &opt ) < 0 )
         ret = -1;
 
+#ifdef _WIN32
     /* Restore title; it can be changed by input modules */
-    SetConsoleTitle( originalCTitle );
+    SetConsoleTitleW( org_console_title );
+#endif
 
     /* Control-C handler */
     signal( SIGINT, sigint_handler );
@@ -306,7 +384,10 @@
     if( opt.qpfile )
         fclose( opt.qpfile );
 
-    SetConsoleTitle( originalCTitle );
+#ifdef _WIN32
+    SetConsoleTitleW( org_console_title );
+    free( argv );
+#endif
 
     return ret;
 }
@@ -339,16 +420,19 @@
     printf( INDENT );
     for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
     {
-        printf( "%s", x264_cli_csps[i].name );
-        if( i+1 < X264_CSP_CLI_MAX )
-            printf( ", " );
+        if( x264_cli_csps[i].name )
+        {
+            printf( "%s", x264_cli_csps[i].name );
+            if( i+1 < X264_CSP_CLI_MAX )
+                printf( ", " );
+        }
     }
 #if HAVE_LAVF
     printf( "\n" );
     printf( "                              - valid csps for `lavf' demuxer:\n" );
     printf( INDENT );
     size_t line_len = strlen( INDENT );
-    for( enum PixelFormat i = PIX_FMT_NONE+1; i < PIX_FMT_NB; i++ )
+    for( enum PixelFormat i = AV_PIX_FMT_NONE+1; i < AV_PIX_FMT_NB; i++ )
     {
         const char *pfname = av_get_pix_fmt_name( i );
         if( pfname )
@@ -361,7 +445,7 @@
             }
             printf( "%s", pfname );
             line_len += name_len;
-            if( i+1 < PIX_FMT_NB )
+            if( i+1 < AV_PIX_FMT_NB )
             {
                 printf( ", " );
                 line_len += 2;
@@ -389,7 +473,7 @@
         " .264 -> Raw bytestream\n"
         " .mkv -> Matroska\n"
         " .flv -> Flash Video\n"
-        " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
+        " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n"
         "Output bit depth: %d (configured at compile time)\n"
         "\n"
         "Options:\n"
@@ -415,7 +499,9 @@
         "no",
 #endif
 #if HAVE_GPAC
-        "yes",
+        "gpac",
+#elif HAVE_LSMASH
+        "lsmash",
 #else
         "no",
 #endif
@@ -764,6 +850,8 @@
 
     H2( "      --nal-hrd <string>      Signal HRD information (requires vbv-bufsize)\n"
         "                                  - none, vbr, cbr (cbr not allowed in .mp4)\n" );
+    H2( "      --filler                Force hard-CBR and generate filler (implied by\n"
+        "                              --nal-hrd cbr)\n" );
     H2( "      --pic-struct            Force pic_struct in Picture Timing SEI\n" );
     H2( "      --crop-rect <string>    Add 'left,top,right,bottom' to the bitstream-level\n"
         "                              cropping rectangle\n" );
@@ -792,6 +880,8 @@
     H0( "      --frames <integer>      Maximum number of frames to encode\n" );
     H0( "      --level <string>        Specify level (as defined by Annex A)\n" );
     H1( "      --bluray-compat         Enable compatibility hacks for Blu-ray support\n" );
+    H1( "      --avcintra-class <integer> Use compatibility hacks for AVC-Intra class\n"
+        "                                  - 50, 100, 200\n" );
     H1( "      --stitchable            Don't optimize headers based on video content\n"
         "                              Ensures ability to recombine a segmented encode\n" );
     H1( "\n" );
@@ -815,8 +905,7 @@
     H2( "      --no-asm                Disable all CPU optimizations\n" );
     H2( "      --opencl                Enable use of OpenCL\n" );
     H2( "      --opencl-clbin <string> Specify path of compiled OpenCL kernel cache\n" );
-    H2( "      --opencl-device <integer>  Specify OpenCL device ordinal\n" );
-    H2( "      --visualize             Show MB types overlayed on the encoded video\n" );
+    H2( "      --opencl-device <integer> Specify OpenCL device ordinal\n" );
     H2( "      --dump-yuv <string>     Save reconstructed frames\n" );
     H2( "      --sps-id <integer>      Set SPS and PPS id numbers [%d]\n", defaults->i_sps_id );
     H2( "      --aud                   Use access unit delimiters\n" );
@@ -848,7 +937,6 @@
     OPT_THREAD_INPUT,
     OPT_QUIET,
     OPT_NOPROGRESS,
-    OPT_VISUALIZE,
     OPT_LONGHELP,
     OPT_PROFILE,
     OPT_PRESET,
@@ -895,6 +983,7 @@
     { "b-pyramid",   required_argument, NULL, 0 },
     { "open-gop",          no_argument, NULL, 0 },
     { "bluray-compat",     no_argument, NULL, 0 },
+    { "avcintra-class", required_argument, NULL, 0 },
     { "min-keyint",  required_argument, NULL, 'i' },
     { "keyint",      required_argument, NULL, 'I' },
     { "intra-refresh",     no_argument, NULL, 0 },
@@ -997,7 +1086,6 @@
     { "verbose",           no_argument, NULL, 'v' },
     { "log-level",   required_argument, NULL, OPT_LOG_LEVEL },
     { "no-progress",       no_argument, NULL, OPT_NOPROGRESS },
-    { "visualize",         no_argument, NULL, OPT_VISUALIZE },
     { "dump-yuv",    required_argument, NULL, 0 },
     { "sps-id",      required_argument, NULL, 0 },
     { "aud",               no_argument, NULL, 0 },
@@ -1041,6 +1129,7 @@
     { "output-csp",  required_argument, NULL, OPT_OUTPUT_CSP },
     { "input-range", required_argument, NULL, OPT_INPUT_RANGE },
     { "stitchable",        no_argument, NULL, 0 },
+    { "filler",            no_argument, NULL, 0 },
     {0, 0, 0, 0}
 };
 
@@ -1052,7 +1141,7 @@
 
     if( !strcasecmp( ext, "mp4" ) )
     {
-#if HAVE_GPAC
+#if HAVE_GPAC || HAVE_LSMASH
         cli_output = mp4_output;
         param->b_annexb = 0;
         param->b_repeat_headers = 0;
@@ -1094,7 +1183,7 @@
     b_regular = b_regular && x264_is_regular_file_path( filename );
     if( b_regular )
     {
-        FILE *f = fopen( filename, "r" );
+        FILE *f = x264_fopen( filename, "r" );
         if( f )
         {
             b_regular = x264_is_regular_file( f );
@@ -1197,7 +1286,7 @@
     int csp = info->csp & X264_CSP_MASK;
     if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) )
         param->i_csp = X264_CSP_I420;
-    else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_NV16) )
+    else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_V210) )
         param->i_csp = X264_CSP_I422;
     else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) )
         param->i_csp = X264_CSP_I444;
@@ -1338,7 +1427,7 @@
                 input_opt.index_file = optarg;
                 break;
             case OPT_QPFILE:
-                opt->qpfile = fopen( optarg, "rb" );
+                opt->qpfile = x264_fopen( optarg, "rb" );
                 FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
                 if( !x264_is_regular_file( opt->qpfile ) )
                 {
@@ -1366,14 +1455,6 @@
             case OPT_NOPROGRESS:
                 opt->b_progress = 0;
                 break;
-            case OPT_VISUALIZE:
-#if HAVE_VISUALIZE
-                param->b_visualize = 1;
-                b_exit_on_ctrl_c = 1;
-#else
-                x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
-#endif
-                break;
             case OPT_TUNE:
             case OPT_PRESET:
                 break;
@@ -1397,7 +1478,7 @@
                 tcfile_name = optarg;
                 break;
             case OPT_TCFILE_OUT:
-                opt->tcfile_out = fopen( optarg, "wb" );
+                opt->tcfile_out = x264_fopen( optarg, "wb" );
                 FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
                 break;
             case OPT_TIMEBASE:
@@ -1498,8 +1579,11 @@
     info.fps_den    = param->i_fps_den;
     info.fullrange  = input_opt.input_range == RANGE_PC;
     info.interlaced = param->b_interlaced;
-    info.sar_width  = param->vui.i_sar_width;
-    info.sar_height = param->vui.i_sar_height;
+    if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
+    {
+        info.sar_width  = param->vui.i_sar_width;
+        info.sar_height = param->vui.i_sar_height;
+    }
     info.tff        = param->b_tff;
     info.vfr        = param->b_vfr_input;
 
@@ -1542,7 +1626,7 @@
 #endif
 
     /* override detected values by those specified by the user */
-    if( param->vui.i_sar_width && param->vui.i_sar_height )
+    if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
     {
         info.sar_width  = param->vui.i_sar_width;
         info.sar_height = param->vui.i_sar_height;
@@ -1721,11 +1805,9 @@
                  eta/3600, (eta/60)%60, eta%60 );
     }
     else
-    {
         sprintf( buf, "x264 %d frames: %.2f fps, %.2f kb/s", i_frame, fps, bitrate );
-    }
     fprintf( stderr, "%s  \r", buf+5 );
-    SetConsoleTitle( buf );
+    x264_cli_set_console_title( buf );
     fflush( stderr ); // needed in windows
     return i_time;
 }
​

x264-snapshot-20130723-2245.tar.bz2/x264.h -> x264-snapshot-20140321-2245.tar.bz2/x264.h Changed

@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264.h: x264 public header
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -41,7 +41,7 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 135
+#define X264_BUILD 142
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -98,12 +98,15 @@
     int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
     int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
 
-    /* Size of payload in bytes. */
+    /* Size of payload (including any padding) in bytes. */
     int     i_payload;
     /* If param->b_annexb is set, Annex-B bytestream with startcode.
      * Otherwise, startcode is replaced with a 4-byte size.
      * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
     uint8_t *p_payload;
+
+    /* Size of padding in bytes. */
+    int i_padding;
 } x264_nal_t;
 
 /****************************************************************************
@@ -122,30 +125,29 @@
 #define X264_CPU_SSSE3           0x0000040
 #define X264_CPU_SSE4            0x0000080  /* SSE4.1 */
 #define X264_CPU_SSE42           0x0000100  /* SSE4.2 */
-#define X264_CPU_SSE_MISALIGN    0x0000200  /* Phenom support for misaligned SSE instruction arguments */
-#define X264_CPU_LZCNT           0x0000400  /* Phenom support for "leading zero count" instruction. */
-#define X264_CPU_AVX             0x0000800  /* AVX support: requires OS support even if YMM registers aren't used. */
-#define X264_CPU_XOP             0x0001000  /* AMD XOP */
-#define X264_CPU_FMA4            0x0002000  /* AMD FMA4 */
-#define X264_CPU_AVX2            0x0004000  /* AVX2 */
-#define X264_CPU_FMA3            0x0008000  /* Intel FMA3 */
-#define X264_CPU_BMI1            0x0010000  /* BMI1 */
-#define X264_CPU_BMI2            0x0020000  /* BMI2 */
+#define X264_CPU_LZCNT           0x0000200  /* Phenom support for "leading zero count" instruction. */
+#define X264_CPU_AVX             0x0000400  /* AVX support: requires OS support even if YMM registers aren't used. */
+#define X264_CPU_XOP             0x0000800  /* AMD XOP */
+#define X264_CPU_FMA4            0x0001000  /* AMD FMA4 */
+#define X264_CPU_AVX2            0x0002000  /* AVX2 */
+#define X264_CPU_FMA3            0x0004000  /* Intel FMA3 */
+#define X264_CPU_BMI1            0x0008000  /* BMI1 */
+#define X264_CPU_BMI2            0x0010000  /* BMI2 */
 /* x86 modifiers */
-#define X264_CPU_CACHELINE_32    0x0040000  /* avoid memory loads that span the border between two cachelines */
-#define X264_CPU_CACHELINE_64    0x0080000  /* 32/64 is the size of a cacheline in bytes */
-#define X264_CPU_SSE2_IS_SLOW    0x0100000  /* avoid most SSE2 functions on Athlon64 */
-#define X264_CPU_SSE2_IS_FAST    0x0200000  /* a few functions are only faster on Core2 and Phenom */
-#define X264_CPU_SLOW_SHUFFLE    0x0400000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
-#define X264_CPU_STACK_MOD4      0x0800000  /* if stack is only mod4 and not mod16 */
-#define X264_CPU_SLOW_CTZ        0x1000000  /* BSR/BSF x86 instructions are really slow on some CPUs */
-#define X264_CPU_SLOW_ATOM       0x2000000  /* The Atom is terrible: slow SSE unaligned loads, slow
+#define X264_CPU_CACHELINE_32    0x0020000  /* avoid memory loads that span the border between two cachelines */
+#define X264_CPU_CACHELINE_64    0x0040000  /* 32/64 is the size of a cacheline in bytes */
+#define X264_CPU_SSE2_IS_SLOW    0x0080000  /* avoid most SSE2 functions on Athlon64 */
+#define X264_CPU_SSE2_IS_FAST    0x0100000  /* a few functions are only faster on Core2 and Phenom */
+#define X264_CPU_SLOW_SHUFFLE    0x0200000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
+#define X264_CPU_STACK_MOD4      0x0400000  /* if stack is only mod4 and not mod16 */
+#define X264_CPU_SLOW_CTZ        0x0800000  /* BSR/BSF x86 instructions are really slow on some CPUs */
+#define X264_CPU_SLOW_ATOM       0x1000000  /* The Atom is terrible: slow SSE unaligned loads, slow
                                              * SIMD multiplies, slow SIMD variable shifts, slow pshufb,
                                              * cacheline split penalties -- gather everything here that
                                              * isn't shared by other CPUs to avoid making half a dozen
                                              * new SLOW flags. */
-#define X264_CPU_SLOW_PSHUFB     0x4000000  /* such as on the Intel Atom */
-#define X264_CPU_SLOW_PALIGNR    0x8000000  /* such as on the AMD Bobcat */
+#define X264_CPU_SLOW_PSHUFB     0x2000000  /* such as on the Intel Atom */
+#define X264_CPU_SLOW_PALIGNR    0x4000000  /* such as on the AMD Bobcat */
 
 /* PowerPC */
 #define X264_CPU_ALTIVEC         0x0000001
@@ -213,12 +215,13 @@
 #define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
 #define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
 #define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_I444           0x0007  /* yuv 4:4:4 planar */
-#define X264_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
-#define X264_CSP_BGR            0x0009  /* packed bgr 24bits   */
-#define X264_CSP_BGRA           0x000a  /* packed bgr 32bits   */
-#define X264_CSP_RGB            0x000b  /* packed rgb 24bits   */
-#define X264_CSP_MAX            0x000c  /* end of list */
+#define X264_CSP_V210           0x0007  /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444           0x0008  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x0009  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x000a  /* packed bgr 24bits   */
+#define X264_CSP_BGRA           0x000b  /* packed bgr 32bits   */
+#define X264_CSP_RGB            0x000c  /* packed rgb 24bits   */
+#define X264_CSP_MAX            0x000d  /* end of list */
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
@@ -319,6 +322,7 @@
     int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
     int         b_open_gop;
     int         b_bluray_compat;
+    int         i_avcintra_class;
 
     int         b_deblocking_filter;
     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
@@ -331,7 +335,7 @@
     int         b_constrained_intra;
 
     int         i_cqm_preset;
-    char        *psz_cqm_file;      /* JM format */
+    char        *psz_cqm_file;      /* filename (in UTF-8) of CQM file, JM format */
     uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
     uint8_t     cqm_4py[16];
     uint8_t     cqm_4ic[16];
@@ -345,9 +349,8 @@
     void        (*pf_log)( void *, int i_level, const char *psz, va_list );
     void        *p_log_private;
     int         i_log_level;
-    int         b_visualize;
     int         b_full_recon;   /* fully reconstruct frames, even when not necessary for encoding.  Implied by psz_dump_yuv */
-    char        *psz_dump_yuv;  /* filename for reconstructed frames */
+    char        *psz_dump_yuv;  /* filename (in UTF-8) for reconstructed frames */
 
     /* Encoder analyser parameters */
     struct
@@ -406,6 +409,10 @@
         float       f_ip_factor;
         float       f_pb_factor;
 
+        /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
+         * Implied by NAL-HRD CBR. */
+        int         b_filler;
+
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
         float       f_aq_strength;
         int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
@@ -413,9 +420,9 @@
 
         /* 2pass */
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
-        char        *psz_stat_out;
+        char        *psz_stat_out;  /* output filename (in UTF-8) of the 2pass stats file */
         int         b_stat_read;    /* Read stat from psz_stat_in and use it */
-        char        *psz_stat_in;
+        char        *psz_stat_in;   /* input filename (in UTF-8) of the 2pass stats file */
 
         /* 2pass params (same as ffmpeg ones) */
         float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
@@ -483,7 +490,7 @@
     int b_opencl;            /* use OpenCL when available */
     int i_opencl_device;     /* specify count of GPU devices to skip, for CLI users */
     void *opencl_device_id;  /* pass explicit cl_device_id as void*, for API users */
-    char *psz_clbin_file;    /* compiled OpenCL kernel cache file */
+    char *psz_clbin_file;    /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
 
     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
@@ -877,13 +884,15 @@
 /* x264_encoder_headers:
  *      return the SPS and PPS that will be used for the whole stream.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
+ *      returns the number of bytes in the returned NALs.
  *      returns negative on error.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
 /* x264_encoder_encode:
  *      encode one picture.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
- *      returns negative on error, zero if no NAL units returned.
+ *      returns the number of bytes in the returned NALs.
+ *      returns negative on error and zero if no NAL units returned.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
 /* x264_encoder_close:

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264.h: x264 public header
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -41,7 +41,7 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 135
+#define X264_BUILD 142
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment
@@ -98,12 +98,15 @@
     int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
     int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
 
-    /* Size of payload in bytes. */
+    /* Size of payload (including any padding) in bytes. */
     int     i_payload;
     /* If param->b_annexb is set, Annex-B bytestream with startcode.
      * Otherwise, startcode is replaced with a 4-byte size.
      * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
     uint8_t *p_payload;
+
+    /* Size of padding in bytes. */
+    int i_padding;
 } x264_nal_t;
 
 /****************************************************************************
@@ -122,30 +125,29 @@
 #define X264_CPU_SSSE3           0x0000040
 #define X264_CPU_SSE4            0x0000080  /* SSE4.1 */
 #define X264_CPU_SSE42           0x0000100  /* SSE4.2 */
-#define X264_CPU_SSE_MISALIGN    0x0000200  /* Phenom support for misaligned SSE instruction arguments */
-#define X264_CPU_LZCNT           0x0000400  /* Phenom support for "leading zero count" instruction. */
-#define X264_CPU_AVX             0x0000800  /* AVX support: requires OS support even if YMM registers aren't used. */
-#define X264_CPU_XOP             0x0001000  /* AMD XOP */
-#define X264_CPU_FMA4            0x0002000  /* AMD FMA4 */
-#define X264_CPU_AVX2            0x0004000  /* AVX2 */
-#define X264_CPU_FMA3            0x0008000  /* Intel FMA3 */
-#define X264_CPU_BMI1            0x0010000  /* BMI1 */
-#define X264_CPU_BMI2            0x0020000  /* BMI2 */
+#define X264_CPU_LZCNT           0x0000200  /* Phenom support for "leading zero count" instruction. */
+#define X264_CPU_AVX             0x0000400  /* AVX support: requires OS support even if YMM registers aren't used. */
+#define X264_CPU_XOP             0x0000800  /* AMD XOP */
+#define X264_CPU_FMA4            0x0001000  /* AMD FMA4 */
+#define X264_CPU_AVX2            0x0002000  /* AVX2 */
+#define X264_CPU_FMA3            0x0004000  /* Intel FMA3 */
+#define X264_CPU_BMI1            0x0008000  /* BMI1 */
+#define X264_CPU_BMI2            0x0010000  /* BMI2 */
 /* x86 modifiers */
-#define X264_CPU_CACHELINE_32    0x0040000  /* avoid memory loads that span the border between two cachelines */
-#define X264_CPU_CACHELINE_64    0x0080000  /* 32/64 is the size of a cacheline in bytes */
-#define X264_CPU_SSE2_IS_SLOW    0x0100000  /* avoid most SSE2 functions on Athlon64 */
-#define X264_CPU_SSE2_IS_FAST    0x0200000  /* a few functions are only faster on Core2 and Phenom */
-#define X264_CPU_SLOW_SHUFFLE    0x0400000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
-#define X264_CPU_STACK_MOD4      0x0800000  /* if stack is only mod4 and not mod16 */
-#define X264_CPU_SLOW_CTZ        0x1000000  /* BSR/BSF x86 instructions are really slow on some CPUs */
-#define X264_CPU_SLOW_ATOM       0x2000000  /* The Atom is terrible: slow SSE unaligned loads, slow
+#define X264_CPU_CACHELINE_32    0x0020000  /* avoid memory loads that span the border between two cachelines */
+#define X264_CPU_CACHELINE_64    0x0040000  /* 32/64 is the size of a cacheline in bytes */
+#define X264_CPU_SSE2_IS_SLOW    0x0080000  /* avoid most SSE2 functions on Athlon64 */
+#define X264_CPU_SSE2_IS_FAST    0x0100000  /* a few functions are only faster on Core2 and Phenom */
+#define X264_CPU_SLOW_SHUFFLE    0x0200000  /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
+#define X264_CPU_STACK_MOD4      0x0400000  /* if stack is only mod4 and not mod16 */
+#define X264_CPU_SLOW_CTZ        0x0800000  /* BSR/BSF x86 instructions are really slow on some CPUs */
+#define X264_CPU_SLOW_ATOM       0x1000000  /* The Atom is terrible: slow SSE unaligned loads, slow
                                              * SIMD multiplies, slow SIMD variable shifts, slow pshufb,
                                              * cacheline split penalties -- gather everything here that
                                              * isn't shared by other CPUs to avoid making half a dozen
                                              * new SLOW flags. */
-#define X264_CPU_SLOW_PSHUFB     0x4000000  /* such as on the Intel Atom */
-#define X264_CPU_SLOW_PALIGNR    0x8000000  /* such as on the AMD Bobcat */
+#define X264_CPU_SLOW_PSHUFB     0x2000000  /* such as on the Intel Atom */
+#define X264_CPU_SLOW_PALIGNR    0x4000000  /* such as on the AMD Bobcat */
 
 /* PowerPC */
 #define X264_CPU_ALTIVEC         0x0000001
@@ -213,12 +215,13 @@
 #define X264_CSP_I422           0x0004  /* yuv 4:2:2 planar */
 #define X264_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
 #define X264_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_I444           0x0007  /* yuv 4:4:4 planar */
-#define X264_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
-#define X264_CSP_BGR            0x0009  /* packed bgr 24bits   */
-#define X264_CSP_BGRA           0x000a  /* packed bgr 32bits   */
-#define X264_CSP_RGB            0x000b  /* packed rgb 24bits   */
-#define X264_CSP_MAX            0x000c  /* end of list */
+#define X264_CSP_V210           0x0007  /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444           0x0008  /* yuv 4:4:4 planar */
+#define X264_CSP_YV24           0x0009  /* yvu 4:4:4 planar */
+#define X264_CSP_BGR            0x000a  /* packed bgr 24bits   */
+#define X264_CSP_BGRA           0x000b  /* packed bgr 32bits   */
+#define X264_CSP_RGB            0x000c  /* packed rgb 24bits   */
+#define X264_CSP_MAX            0x000d  /* end of list */
 #define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
 #define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
 
@@ -319,6 +322,7 @@
     int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
     int         b_open_gop;
     int         b_bluray_compat;
+    int         i_avcintra_class;
 
     int         b_deblocking_filter;
     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
@@ -331,7 +335,7 @@
     int         b_constrained_intra;
 
     int         i_cqm_preset;
-    char        *psz_cqm_file;      /* JM format */
+    char        *psz_cqm_file;      /* filename (in UTF-8) of CQM file, JM format */
     uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
     uint8_t     cqm_4py[16];
     uint8_t     cqm_4ic[16];
@@ -345,9 +349,8 @@
     void        (*pf_log)( void *, int i_level, const char *psz, va_list );
     void        *p_log_private;
     int         i_log_level;
-    int         b_visualize;
     int         b_full_recon;   /* fully reconstruct frames, even when not necessary for encoding.  Implied by psz_dump_yuv */
-    char        *psz_dump_yuv;  /* filename for reconstructed frames */
+    char        *psz_dump_yuv;  /* filename (in UTF-8) for reconstructed frames */
 
     /* Encoder analyser parameters */
     struct
@@ -406,6 +409,10 @@
         float       f_ip_factor;
         float       f_pb_factor;
 
+        /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
+         * Implied by NAL-HRD CBR. */
+        int         b_filler;
+
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
         float       f_aq_strength;
         int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
@@ -413,9 +420,9 @@
 
         /* 2pass */
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
-        char        *psz_stat_out;
+        char        *psz_stat_out;  /* output filename (in UTF-8) of the 2pass stats file */
         int         b_stat_read;    /* Read stat from psz_stat_in and use it */
-        char        *psz_stat_in;
+        char        *psz_stat_in;   /* input filename (in UTF-8) of the 2pass stats file */
 
         /* 2pass params (same as ffmpeg ones) */
         float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
@@ -483,7 +490,7 @@
     int b_opencl;            /* use OpenCL when available */
     int i_opencl_device;     /* specify count of GPU devices to skip, for CLI users */
     void *opencl_device_id;  /* pass explicit cl_device_id as void*, for API users */
-    char *psz_clbin_file;    /* compiled OpenCL kernel cache file */
+    char *psz_clbin_file;    /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
 
     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
@@ -877,13 +884,15 @@
 /* x264_encoder_headers:
  *      return the SPS and PPS that will be used for the whole stream.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
+ *      returns the number of bytes in the returned NALs.
  *      returns negative on error.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
 /* x264_encoder_encode:
  *      encode one picture.
  *      *pi_nal is the number of NAL units outputted in pp_nal.
- *      returns negative on error, zero if no NAL units returned.
+ *      returns the number of bytes in the returned NALs.
+ *      returns negative on error and zero if no NAL units returned.
  *      the payloads of all output NALs are guaranteed to be sequential in memory. */
 int     x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
 /* x264_encoder_close:
​

x264-snapshot-20130723-2245.tar.bz2/x264cli.h -> x264-snapshot-20140321-2245.tar.bz2/x264cli.h Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264cli.h: x264cli common
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2014 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Loren Merritt <lorenm@u.washington.edu>
@@ -63,6 +63,13 @@
 void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
 void x264_cli_printf( int i_level, const char *fmt, ... );
 
+#ifdef _WIN32
+void x264_cli_set_console_title( const char *title );
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file );
+#else
+#define x264_cli_set_console_title( title )
+#endif
+
 #define RETURN_IF_ERR( cond, name, ret, ... )\
 if( cond )\
 {\
​

x264-snapshot-20130723-2245.tar.bz2/x264dll.c -> x264-snapshot-20140321-2245.tar.bz2/x264dll.c Changed

 
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * x264dll: x264 DLLMain for win32
  *****************************************************************************
- * Copyright (C) 2009-2013 x264 project
+ * Copyright (C) 2009-2014 x264 project
  *
  * Authors: Anton Mitrofanov <BugMaster@narod.ru>
  *
@@ -27,7 +27,7 @@
 #include <windows.h>
 
 /* Callback for our DLL so we can initialize pthread */
-BOOL WINAPI DllMain( HANDLE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
+BOOL WINAPI DllMain( HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
 {
 #if PTW32_STATIC_LIB
     switch( fdwReason )
​

x264-snapshot-20130723-2245.tar.bz2/x264res.rc -> x264-snapshot-20140321-2245.tar.bz2/x264res.rc Changed

@@ -1,9 +1,9 @@
 /*****************************************************************************
  * x264res.rc: windows resource file
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
- * Authors: Henrik Gramner <hengar-6@student.ltu.se>
+ * Authors: Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@
 BEGIN
     BLOCK "StringFileInfo"
     BEGIN
-        BLOCK "040904E4"
+        BLOCK "040904B0"
         BEGIN
             VALUE "CompanyName",      "x264 project"
 #ifdef DLL
@@ -60,7 +60,7 @@
 #endif
             VALUE "FileVersion",      X264_POINTVER
             VALUE "InternalName",     "x264"
-            VALUE "LegalCopyright",   "Copyright (C) 2003-2013 x264 project"
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2014 x264 project"
 #ifdef DLL
             VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
 #else
@@ -73,6 +73,6 @@
 
     BLOCK "VarFileInfo"
     BEGIN
-        VALUE "Translation", 0x0409, 0x04E4
+        VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
     END
 END

 
@@ -1,9 +1,9 @@
 /*****************************************************************************
  * x264res.rc: windows resource file
  *****************************************************************************
- * Copyright (C) 2012-2013 x264 project
+ * Copyright (C) 2012-2014 x264 project
  *
- * Authors: Henrik Gramner <hengar-6@student.ltu.se>
+ * Authors: Henrik Gramner <henrik@gramner.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@
 BEGIN
     BLOCK "StringFileInfo"
     BEGIN
-        BLOCK "040904E4"
+        BLOCK "040904B0"
         BEGIN
             VALUE "CompanyName",      "x264 project"
 #ifdef DLL
@@ -60,7 +60,7 @@
 #endif
             VALUE "FileVersion",      X264_POINTVER
             VALUE "InternalName",     "x264"
-            VALUE "LegalCopyright",   "Copyright (C) 2003-2013 x264 project"
+            VALUE "LegalCopyright",   "Copyright (C) 2003-2014 x264 project"
 #ifdef DLL
             VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
 #else
@@ -73,6 +73,6 @@
 
     BLOCK "VarFileInfo"
     BEGIN
-        VALUE "Translation", 0x0409, 0x04E4
+        VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
     END
 END
​