Projects
Essentials
libx264
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 6
View file
libx264.changes
Changed
@@ -1,4 +1,9 @@ ------------------------------------------------------------------- +Sat Mar 22 17:10:14 UTC 2014 - i@margueirte.su + +- update version 20140321. + +------------------------------------------------------------------- Tue Nov 19 07:53:08 UTC 2013 - obs@botter.cc - add -fno-aggressive-loop-optimizations to extra-cflags in
View file
libx264.spec
Changed
@@ -14,8 +14,8 @@ # Please submit bugfixes or comments via http://bugs.links2linux.org/ Name: libx264 -%define soname 135 -%define svn 20130723 +%define soname 142 +%define svn 20140321 Version: 0.%{soname}svn%{svn} Release: 1 License: GPL-2.0+
View file
x264-snapshot-20130723-2245.tar.bz2/common/display-x11.c
Deleted
@@ -1,218 +0,0 @@ -/***************************************************************************** - * display-x11.c: x11 interface - ***************************************************************************** - * Copyright (C) 2005-2013 x264 project - * - * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - * - * This program is also available under a commercial proprietary license. - * For more information, contact us at licensing@x264.com. - *****************************************************************************/ - -#include <X11/Xlib.h> -#include <X11/Xutil.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "common.h" -#include "display.h" - -static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask; - -static Display *disp_display = NULL; -static struct disp_window -{ - int init; - Window window; -} disp_window[10]; - -static inline void disp_chkerror( int cond, char *e ) -{ - if( !cond ) - return; - fprintf( stderr, "error: %s\n", e ? e : "?" ); - abort(); -} - -static void disp_init_display( void ) -{ - Visual *visual; - int dpy_class; - int screen; - int dpy_depth; - - if( disp_display ) - return; - memset( &disp_window, 0, sizeof(disp_window) ); - disp_display = XOpenDisplay( "" ); - disp_chkerror( !disp_display, "no display" ); - screen = DefaultScreen( disp_display ); - visual = DefaultVisual( disp_display, screen ); - dpy_class = visual->class; - dpy_depth = DefaultDepth( disp_display, screen ); - disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32) - || (dpy_class == TrueColor && dpy_depth == 24) - || (dpy_class == TrueColor && dpy_depth == 16) - || (dpy_class == PseudoColor && dpy_depth == 8)), - "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" ); -} - -static void disp_init_window( int num, int width, int height, const unsigned char *title ) -{ - XSetWindowAttributes xswa; - XEvent xev; - int screen = DefaultScreen(disp_display); - Visual *visual = DefaultVisual (disp_display, screen); - char buf[200]; - Window window; - - if( title ) - snprintf( buf, 200, "%s: %i/disp", title, num ); - else - snprintf( buf, 200, "%i/disp", num ); - - XSizeHints *shint = XAllocSizeHints(); - disp_chkerror( !shint, "memerror" ); - shint->min_width = shint->max_width = shint->width = width; - shint->min_height = shint->max_height = shint->height = height; - shint->flags = PSize | PMinSize | PMaxSize; - disp_chkerror( num < 0 || num >= 10, "bad win num" ); - if( !disp_window[num].init ) - { - unsigned int mask = 0; - disp_window[num].init = 1; - unsigned int bg = WhitePixel( disp_display, screen ); - unsigned int fg = BlackPixel( disp_display, screen ); - int dpy_depth = DefaultDepth( disp_display, screen ); - if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 ) - { - mask |= CWColormap; - xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone ); - } - xswa.background_pixel = bg; - xswa.border_pixel = fg; - xswa.backing_store = Always; - xswa.backing_planes = -1; - xswa.bit_gravity = NorthWestGravity; - mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity; - window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ), - shint->x, shint->y, shint->width, shint->height, - 1, dpy_depth, InputOutput, visual, mask, &xswa ); - disp_window[num].window = window; - - XSelectInput( disp_display, window, event_mask ); - XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint ); - XMapWindow( disp_display, window ); - - do { - XNextEvent( disp_display, &xev ); - } while( xev.type != MapNotify || xev.xmap.event != window ); - } - window = disp_window[num].window; - XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint ); - XResizeWindow( disp_display, window, width, height ); - XSync( disp_display, 1 ); - XFree( shint ); -} - -void disp_sync( void ) -{ - XSync( disp_display, 1 ); -} - -void disp_setcolor( unsigned char *name ) -{ - XColor c_exact, c_nearest; - - int screen = DefaultScreen( disp_display ); - GC gc = DefaultGC( disp_display, screen ); - Colormap cm = DefaultColormap( disp_display, screen ); - Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact ); - disp_chkerror( st != 1, "XAllocNamedColor error" ); - XSetForeground( disp_display, gc, c_nearest.pixel ); -} - -void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title ) -{ - char dummy; - - disp_init_display(); - disp_init_window( num, width, height, title ); - int screen = DefaultScreen( disp_display ); - Visual *visual = DefaultVisual( disp_display, screen ); - int dpy_depth = DefaultDepth( disp_display, screen ); - XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 ); - disp_chkerror( !ximage, "no ximage" ); -#if WORDS_BIGENDIAN - ximage->byte_order = MSBFirst; - ximage->bitmap_bit_order = MSBFirst; -#else - ximage->byte_order = LSBFirst; - ximage->bitmap_bit_order = LSBFirst; -#endif - - int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char); - uint8_t *image = malloc( width * height * pixelsize ); - disp_chkerror( !image, "malloc failed" ); - for( int y = 0; y < height; y++ ) - for( int x = 0; x < width; x++ ) - memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize ); - ximage->data = image; - GC gc = DefaultGC( disp_display, screen ); - - XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height ); - XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height ); - - XDestroyImage( ximage ); - XSync( disp_display, 1 ); - -} - -void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom) -{ - unsigned char *dataz = malloc( width*zoom * height*zoom ); - disp_chkerror( !dataz, "malloc" ); - for( int y = 0; y < height; y++ ) - for( int x = 0; x < width; x++ ) - for( int y0 = 0; y0 < zoom; y0++ ) - for( int x0 = 0; x0 < zoom; x0++ ) - dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x]; - disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title ); - free( dataz ); -} - -void disp_point( int num, int x1, int y1 ) -{ - int screen = DefaultScreen( disp_display ); - GC gc = DefaultGC( disp_display, screen ); - XDrawPoint( disp_display, disp_window[num].window, gc, x1, y1 ); -} - -void disp_line( int num, int x1, int y1, int x2, int y2 ) -{ - int screen = DefaultScreen( disp_display ); - GC gc = DefaultGC( disp_display, screen ); - XDrawLine( disp_display, disp_window[num].window, gc, x1, y1, x2, y2 ); -} - -void disp_rect( int num, int x1, int y1, int x2, int y2 ) -{ - int screen = DefaultScreen( disp_display ); - GC gc = DefaultGC( disp_display, screen ); - XDrawRectangle( disp_display, disp_window[num].window, gc, x1, y1, x2-x1, y2-y1 ); -}
View file
x264-snapshot-20130723-2245.tar.bz2/common/display.h
Deleted
@@ -1,41 +0,0 @@ -/***************************************************************************** - * display.h: x11 visualization interface - ***************************************************************************** - * Copyright (C) 2005-2013 x264 project - * - * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - * - * This program is also available under a commercial proprietary license. - * For more information, contact us at licensing@x264.com. - *****************************************************************************/ - -#ifndef X264_DISPLAY_H -#define X264_DISPLAY_H - -void disp_sync(void); -void disp_setcolor(unsigned char *name); -/* Display a region of byte wide memory as a grayscale image. - * num is the window to use for displaying. */ -void disp_gray(int num, char *data, int width, int height, - int stride, const unsigned char *title); -void disp_gray_zoom(int num, char *data, int width, int height, - int stride, const unsigned char *title, int zoom); -void disp_point(int num, int x1, int y1); -void disp_line(int num, int x1, int y1, int x2, int y2); -void disp_rect(int num, int x1, int y1, int x2, int y2); - -#endif
View file
x264-snapshot-20130723-2245.tar.bz2/common/visualize.c
Deleted
@@ -1,341 +0,0 @@ -/***************************************************************************** - * visualize.c: visualization - ***************************************************************************** - * Copyright (C) 2005-2013 x264 project - * - * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - * - * This program is also available under a commercial proprietary license. - * For more information, contact us at licensing@x264.com. - *****************************************************************************/ - -/* - * Some explanation of the symbols used: - * Red/pink: intra block - * Blue: inter block - * Green: skip block - * Yellow: B-block (not visualized properly yet) - * - * Motion vectors have black dot at their target (ie. at the MB center), - * instead of arrowhead. The black dot is enclosed in filled diamond with radius - * depending on reference frame number (one frame back = zero width, normal case). - * - * The intra blocks have generally lines drawn perpendicular - * to the prediction direction, so for example, if there is a pink block - * with horizontal line at the top of it, it is interpolated by assuming - * luma to be vertically constant. - * DC predicted blocks have both horizontal and vertical lines, - * pink blocks with a diagonal line are predicted using the planar function. - */ - -#include "common.h" -#include "visualize.h" -#include "display.h" - -typedef struct -{ - int i_type; - int i_partition; - int i_sub_partition[4]; - int i_intra16x16_pred_mode; - int intra4x4_pred_mode[4][4]; - int8_t ref[2][4][4]; /* [list][y][x] */ - int16_t mv[2][4][4][2]; /* [list][y][x][mvxy] */ -} visualize_t; - -/* Return string from stringlist corresponding to the given code */ -#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code) - -typedef struct -{ - int code; - char *string; -} stringlist_t; - -static char *get_string( const stringlist_t *sl, int entries, int code ) -{ - for( int i = 0; i < entries; i++ ) - if( sl[i].code == code ) - return sl[i].string; - return "?"; -} - -/* Plot motion vector */ -static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col ) -{ - int dx = dmv[0]; - int dy = dmv[1]; - - dx = (dx * zoom + 2) >> 2; - dy = (dy * zoom + 2) >> 2; - disp_line( 0, x0, y0, x0+dx, y0+dy ); - for( int i = 1; i < ref; i++ ) - { - disp_line( 0, x0 , y0-i, x0+i, y0 ); - disp_line( 0, x0+i, y0 , x0 , y0+i ); - disp_line( 0, x0 , y0+i, x0-i, y0 ); - disp_line( 0, x0-i, y0 , x0 , y0-i ); - } - disp_setcolor( "black" ); - disp_point( 0, x0, y0 ); - disp_setcolor( col ); -} - -int x264_visualize_init( x264_t *h ) -{ - CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) ); - return 0; -fail: - return -1; -} - -void x264_visualize_mb( x264_t *h ) -{ - visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy; - - /* Save all data for the MB that we need for drawing the visualization */ - v->i_type = h->mb.i_type; - v->i_partition = h->mb.i_partition; - for( int i = 0; i < 4; i++ ) - v->i_sub_partition[i] = h->mb.i_sub_partition[i]; - for( int y = 0; y < 4; y++ ) - for( int x = 0; x < 4; x++ ) - v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x]; - for( int l = 0; l < 2; l++ ) - for( int y = 0; y < 4; y++ ) - for( int x = 0; x < 4; x++ ) - { - for( int i = 0; i < 2; i++ ) - v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i]; - v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x]; - } - v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode; -} - -void x264_visualize_close( x264_t *h ) -{ - x264_free(h->visualize); -} - -/* Display visualization (block types, MVs) of the encoded frame */ -/* FIXME: B-type MBs not handled yet properly */ -void x264_visualize_show( x264_t *h ) -{ - static const stringlist_t mb_types[] = - { - /* Block types marked as NULL will not be drawn */ - { I_4x4 , "red" }, - { I_8x8 , "#ff5640" }, - { I_16x16 , "#ff8060" }, - { I_PCM , "violet" }, - { P_L0 , "SlateBlue" }, - { P_8x8 , "blue" }, - { P_SKIP , "green" }, - { B_DIRECT, "yellow" }, - { B_L0_L0 , "yellow" }, - { B_L0_L1 , "yellow" }, - { B_L0_BI , "yellow" }, - { B_L1_L0 , "yellow" }, - { B_L1_L1 , "yellow" }, - { B_L1_BI , "yellow" }, - { B_BI_L0 , "yellow" }, - { B_BI_L1 , "yellow" }, - { B_BI_BI , "yellow" }, - { B_8x8 , "yellow" }, - { B_SKIP , "yellow" }, - }; - - static const int waitkey = 1; /* Wait for enter after each frame */ - static const int drawbox = 1; /* Draw box around each block */ - static const int borders = 0; /* Display extrapolated borders outside frame */ - static const int zoom = 2; /* Zoom factor */ - - static const int pad = 32; - pixel *const frame = h->fdec->plane[0]; - const int width = h->param.i_width; - const int height = h->param.i_height; - const int stride = h->fdec->i_stride[0]; - - if( borders ) - disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom ); - else - disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom ); - - for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ ) - { - visualize_t *const v = (visualize_t*)h->visualize + mb_xy; - const int mb_y = mb_xy / h->mb.i_mb_width; - const int mb_x = mb_xy % h->mb.i_mb_width; - char *const col = GET_STRING( mb_types, v->i_type ); - int x = mb_x*16*zoom; - int y = mb_y*16*zoom; - int l = 0; - - if( !col ) - continue; - - if( borders ) - { - x += pad*zoom; - y += pad*zoom; - } - - disp_setcolor( col ); - if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 ); - - if( v->i_type==P_L0 || v->i_type==P_8x8 || v->i_type==P_SKIP ) - { - /* Predicted (inter) mode, with motion vector */ - if( v->i_partition == D_16x16 || v->i_type == P_SKIP ) - mv( x+8*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col ); - else if (v->i_partition == D_16x8) - { - if( drawbox ) disp_rect( 0, x, y, x+16*zoom, y+8*zoom ); - mv( x+8*zoom, y+4*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col ); - if( drawbox ) disp_rect( 0, x, y+8*zoom, x+16*zoom, y+16*zoom ); - mv( x+8*zoom, y+12*zoom, v->mv[l][2][0], v->ref[l][2][0], zoom, col ); - } - else if( v->i_partition==D_8x16 ) - { - if( drawbox ) disp_rect( 0, x, y, x+8*zoom, y+16*zoom ); - mv( x+4*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col ); - if( drawbox ) disp_rect( 0, x+8*zoom, y, x+16*zoom, y+16*zoom ); - mv( x+12*zoom, y+8*zoom, v->mv[l][0][2], v->ref[l][0][2], zoom, col ); - } - else if( v->i_partition==D_8x8 ) - { - for( int i = 0; i < 2; i++ ) - for( int j = 0; j < 2; j++ ) - { - int sp = v->i_sub_partition[i*2+j]; - const int x0 = x + j*8*zoom; - const int y0 = y + i*8*zoom; - l = x264_mb_partition_listX_table[0][sp] ? 0 : 1; /* FIXME: not tested if this works */ - if( IS_SUB8x8(sp) ) - { - if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+8*zoom ); - mv( x0+4*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col ); - } - else if( IS_SUB8x4(sp) ) - { - if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+4*zoom ); - if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+8*zoom, y0+8*zoom ); - mv( x0+4*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col ); - mv( x0+4*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col ); - } - else if( IS_SUB4x8(sp) ) - { - if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+8*zoom ); - if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+8*zoom ); - mv( x0+2*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col ); - mv( x0+6*zoom, y0+4*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col ); - } - else if( IS_SUB4x4(sp) ) - { - if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+4*zoom ); - if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+4*zoom ); - if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+4*zoom, y0+8*zoom ); - if( drawbox ) disp_rect( 0, x0+4*zoom, y0+4*zoom, x0+8*zoom, y0+8*zoom ); - mv( x0+2*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col ); - mv( x0+6*zoom, y0+2*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col ); - mv( x0+2*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col ); - mv( x0+6*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j+1], v->ref[l][2*i+1][2*j+1], zoom, col ); - } - } - } - } - - if( IS_INTRA(v->i_type) || v->i_type == I_PCM ) - { - /* Intra coded */ - if( v->i_type == I_16x16 ) - { - switch (v->i_intra16x16_pred_mode) { - case I_PRED_16x16_V: - disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom ); - break; - case I_PRED_16x16_H: - disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom ); - break; - case I_PRED_16x16_DC: - case I_PRED_16x16_DC_LEFT: - case I_PRED_16x16_DC_TOP: - case I_PRED_16x16_DC_128: - disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom ); - disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom ); - break; - case I_PRED_16x16_P: - disp_line( 0, x+2*zoom, y+2*zoom, x+8*zoom, y+8*zoom ); - break; - } - } - if( v->i_type==I_4x4 || v->i_type==I_8x8 ) - { - const int di = v->i_type == I_8x8 ? 2 : 1; - const int zoom2 = zoom * di; - for( int i = 0; i < 4; i += di ) - for( int j = 0; j < 4; j += di ) - { - const int x0 = x + j*4*zoom; - const int y0 = y + i*4*zoom; - if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom2, y0+4*zoom2 ); - switch( v->intra4x4_pred_mode[i][j] ) - { - case I_PRED_4x4_V: /* Vertical */ - disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 ); - break; - case I_PRED_4x4_H: /* Horizontal */ - disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 ); - break; - case I_PRED_4x4_DC: /* DC, average from top and left sides */ - case I_PRED_4x4_DC_LEFT: - case I_PRED_4x4_DC_TOP: - case I_PRED_4x4_DC_128: - disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 ); - disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2 ); - break; - case I_PRED_4x4_DDL: /* Topright-bottomleft */ - disp_line( 0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2 ); - break; - case I_PRED_4x4_DDR: /* Topleft-bottomright */ - disp_line( 0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2 ); - break; - case I_PRED_4x4_VR: /* Mix of topleft-bottomright and vertical */ - disp_line( 0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2 ); - break; - case I_PRED_4x4_HD: /* Mix of topleft-bottomright and horizontal */ - disp_line( 0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 ); - break; - case I_PRED_4x4_VL: /* Mix of topright-bottomleft and vertical */ - disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2 ); - break; - case I_PRED_4x4_HU: /* Mix of topright-bottomleft and horizontal */ - disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2 ); - break; - } - } - } - } - } - - disp_sync(); - if( waitkey ) - getchar(); -} -/* }}} */ - -//EOF
View file
x264-snapshot-20130723-2245.tar.bz2/common/visualize.h
Deleted
@@ -1,36 +0,0 @@ -/***************************************************************************** - * visualize.h: visualization - ***************************************************************************** - * Copyright (C) 2005-2013 x264 project - * - * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - * - * This program is also available under a commercial proprietary license. - * For more information, contact us at licensing@x264.com. - *****************************************************************************/ - -#ifndef X264_VISUALIZE_H -#define X264_VISUALIZE_H - -#include "common/common.h" - -int x264_visualize_init( x264_t *h ); -void x264_visualize_mb( x264_t *h ); -void x264_visualize_show( x264_t *h ); -void x264_visualize_close( x264_t *h ); - -#endif
View file
x264-snapshot-20130723-2245.tar.bz2/tools/xyuv.c
Deleted
@@ -1,792 +0,0 @@ -/***************************************************************************** - * xyuv.c: a SDL yuv 420 planer viewer. - ***************************************************************************** - * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. - *****************************************************************************/ - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <stdint.h> - -#include <SDL/SDL.h> - -#define YUV_MAX 20 -#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps" -typedef struct -{ - /* globals */ - int i_width; - int i_height; - int i_frame_size; - int i_frame; - int i_frames; - float f_fps; - - float f_y; - - int b_pause; - int b_grid; - int b_split; - int b_diff; - int i_join; - - /* Constructed picture */ - int i_wall_width; /* in picture count */ - - /* YUV files */ - int i_yuv; - struct - { - char *name; - FILE *f; /* handles */ - int i_frames; /* frames count */ - - /* Position in the whole picture */ - int x, y; - } yuv[YUV_MAX]; - - /* SDL */ - int i_sdl_width; - int i_sdl_height; - - int i_display_width; - int i_display_height; - char *title; - - SDL_Surface *screen; - SDL_Overlay *overlay; - - /* */ - uint8_t *pic; - -} xyuv_t; - -xyuv_t xyuv = { - .i_width = 0, - .i_height = 0, - .i_frame = 1, - .i_frames = 0, - .f_fps = 25.0, - .f_y = 0.0, - .i_wall_width = 0, - - .i_yuv = 0, - - .b_pause = 0, - .b_split = 0, - .b_diff = 0, - .i_join = -1, - - .title = NULL, - .pic = NULL, -}; - -static void help( void ) -{ - fprintf( stderr, - "Syntax: xyuv [options] file [file2 ...]\n" - "\n" - " --help Print this help\n" - "\n" - " -s, --size <WIDTHxHEIGHT> Set input size\n" - " -w, --width <integer> Set width\n" - " -h, --height <integer> Set height\n" - "\n" - " -S, --split Show splited Y/U/V planes\n" - " -d, --diff Show difference (only 2 files) in split mode\n" - " -j, --joint <integer>\n" - "\n" - " -y <float> Set Y factor\n" - "\n" - " -g, --grid Show a grid (macroblock 16x16)\n" - " -W <integer> Set wall width (in picture count)\n" - " -f, --fps <float> Set fps\n" - "\n" ); -} - -static void xyuv_count_frames( xyuv_t *xyuv ); -static void xyuv_detect( int *pi_width, int *pi_height ); -static void xyuv_display( xyuv_t *xyuv, int i_frame ); - -int main( int argc, char **argv ) -{ - int i; - - /* Parse commande line */ - for( i = 1; i < argc; i++ ) { - if( !strcasecmp( argv[i], "--help" ) ) { - help(); - return 0; - } - if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) { - xyuv.b_diff = 1; - } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) { - xyuv.b_split = 1; - } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) { - if( i >= argc -1 ) goto err_missing_arg; - xyuv.f_fps = atof( argv[++i] ); - } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) { - if( i >= argc -1 ) goto err_missing_arg; - xyuv.i_height = atoi( argv[++i] ); - } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) { - if( i >= argc -1 ) goto err_missing_arg; - xyuv.i_width = atoi( argv[++i] ); - } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) { - char *p; - - if( i >= argc -1 ) goto err_missing_arg; - - xyuv.i_width = strtol( argv[++i], &p, 0 ); - p++; - xyuv.i_height = atoi( p ); - } else if( !strcmp( argv[i], "-W" ) ) { - if( i >= argc -1 ) goto err_missing_arg; - xyuv.i_wall_width = atoi( argv[++i] ); - } else if( !strcmp( argv[i], "-y" ) ) { - if( i >= argc -1 ) goto err_missing_arg; - xyuv.f_y = atof( argv[++i] ); - } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) { - if( i >= argc -1 ) goto err_missing_arg; - xyuv.i_join = atoi( argv[++i] ); - } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) { - xyuv.b_grid = 1; - } else { - FILE *f = fopen( argv[i], "rb" ); - if( !f ) { - fprintf( stderr, "cannot open YUV %s\n", argv[i] ); - } else { - xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] ); - xyuv.yuv[xyuv.i_yuv].f = f; - xyuv.yuv[xyuv.i_yuv].i_frames = 0; - - xyuv.i_yuv++; - } - } - } - - if( xyuv.i_yuv == 0 ) { - fprintf( stderr, "no file to display\n" ); - return -1; - } - if( xyuv.i_width == 0 || xyuv.i_height == 0 ) { - char *psz = xyuv.yuv[0].name; - char *num; - char *x; - /* See if we find widthxheight in the file name */ - for( ;; ) { - if( !( x = strchr( psz+1, 'x' ) ) ) { - break; - } - num = x; - while( num > psz && num[-1] >= '0' && num[-1] <= '9' ) - num--; - - if( num != x && x[1] >= '0' && x[1] <= '9' ) { - xyuv.i_width = atoi( num ); - xyuv.i_height = atoi( x+1 ); - break; - } - psz = x; - } - fprintf( stderr, "file name gives %dx%d\n", xyuv.i_width, xyuv.i_height ); - } - - if( xyuv.i_width == 0 || xyuv.i_height == 0 ) { - xyuv_detect( &xyuv.i_width, &xyuv.i_height ); - } - - if( xyuv.i_width == 0 || xyuv.i_height == 0 ) { - fprintf( stderr, "invalid or missing frames size\n" ); - return -1; - } - if( xyuv.b_diff && xyuv.i_yuv != 2 ) { - fprintf( stderr, "--diff works only with 2 files\n" ); - return -1; - } - if( (xyuv.i_join == 0 || xyuv.i_join >= xyuv.i_width) && xyuv.i_yuv != 2 ) { - fprintf( stderr, "--join woeks only with two files and range is [1, width-1]\n" ); - return -1; - } - if( xyuv.i_join % 2 != 0 ) { - if( xyuv.i_join + 1 < xyuv.i_width ) - xyuv.i_join++; - else - xyuv.i_join--; - } - - /* Now check frames */ - fprintf( stderr, "displaying :\n" ); - xyuv.i_frame_size = 3 * xyuv.i_width * xyuv.i_height / 2; - xyuv_count_frames( &xyuv ); - for( i = 0; i < xyuv.i_yuv; i++ ) { - fprintf( stderr, " - '%s' : %d frames\n", xyuv.yuv[i].name, xyuv.yuv[i].i_frames ); - } - - if( xyuv.i_frames == 0 ) { - fprintf( stderr, "no frames to display\n" ); - } - - xyuv.pic = malloc( xyuv.i_frame_size ); - - /* calculate SDL view */ - if( xyuv.i_wall_width > xyuv.i_yuv ) { - xyuv.i_wall_width = xyuv.i_yuv; - } - if( xyuv.i_wall_width == 0 ) { - while( xyuv.i_wall_width < xyuv.i_yuv && xyuv.i_wall_width * xyuv.i_wall_width < xyuv.i_yuv ) { - xyuv.i_wall_width++; - } - } - - for( i = 0; i < xyuv.i_yuv; i++ ) { - if( xyuv.b_diff || xyuv.i_join > 0 ) { - xyuv.yuv[i].x = 0; - xyuv.yuv[i].y = 0; - } else if( xyuv.b_split ) { - xyuv.yuv[i].x = (i%xyuv.i_wall_width) * 3 * xyuv.i_width / 2; - xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height; - } else { - xyuv.yuv[i].x = (i%xyuv.i_wall_width) * xyuv.i_width; - xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height; - } - } - if( xyuv.b_diff ) { - xyuv.i_sdl_width = 3 * xyuv.i_width / 2; - xyuv.i_sdl_height= xyuv.i_height; - } else if( xyuv.i_join > 0 ) { - xyuv.i_sdl_width = xyuv.i_width; - xyuv.i_sdl_height= xyuv.i_height; - } else if( xyuv.b_split ) { - xyuv.i_sdl_width = xyuv.i_wall_width * 3 * xyuv.i_width / 2; - xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width ); - } else { - xyuv.i_sdl_width = xyuv.i_wall_width * xyuv.i_width; - xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width ); - } - xyuv.i_display_width = xyuv.i_sdl_width; - xyuv.i_display_height = xyuv.i_sdl_height; - - /* Open SDL */ - if( SDL_Init( SDL_INIT_EVENTTHREAD|SDL_INIT_NOPARACHUTE|SDL_INIT_VIDEO) ) { - fprintf( stderr, "cannot init SDL\n" ); - return -1; - } - - SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, 100 ); - SDL_EventState( SDL_KEYUP, SDL_IGNORE ); - - xyuv.screen = SDL_SetVideoMode( xyuv.i_sdl_width, xyuv.i_sdl_height, 0, - SDL_HWSURFACE|SDL_RESIZABLE| - SDL_ASYNCBLIT|SDL_HWACCEL ); - if( xyuv.screen == NULL ) { - fprintf( stderr, "SDL_SetVideoMode failed\n" ); - return -1; - } - - SDL_LockSurface( xyuv.screen ); - xyuv.overlay = SDL_CreateYUVOverlay( xyuv.i_sdl_width, xyuv.i_sdl_height, - SDL_YV12_OVERLAY, - xyuv.screen ); - /* reset with black */ - memset( xyuv.overlay->pixels[0], 0, xyuv.overlay->pitches[0] * xyuv.i_sdl_height ); - memset( xyuv.overlay->pixels[1], 128, xyuv.overlay->pitches[1] * xyuv.i_sdl_height / 2); - memset( xyuv.overlay->pixels[2], 128, xyuv.overlay->pitches[2] * xyuv.i_sdl_height / 2); - SDL_UnlockSurface( xyuv.screen ); - - if( xyuv.overlay == NULL ) { - fprintf( stderr, "recon: SDL_CreateYUVOverlay failed\n" ); - return -1; - } - - for( ;; ) { - SDL_Event event; - static int b_fullscreen = 0; - int64_t i_start = SDL_GetTicks(); - int i_wait; - - if( !xyuv.b_pause ) { - xyuv_display( &xyuv, xyuv.i_frame ); - } - - for( ;; ) { - int b_refresh = 0; - while( SDL_PollEvent( &event ) ) { - switch( event.type ) - { - case SDL_QUIT: - if( b_fullscreen ) - SDL_WM_ToggleFullScreen( xyuv.screen ); - exit( 1 ); - - case SDL_KEYDOWN: - switch( event.key.keysym.sym ) - { - case SDLK_q: - case SDLK_ESCAPE: - if( b_fullscreen ) - SDL_WM_ToggleFullScreen( xyuv.screen ); - exit(1); - - case SDLK_f: - if( SDL_WM_ToggleFullScreen( xyuv.screen ) ) - b_fullscreen = 1 - b_fullscreen; - break; - - case SDLK_g: - if( xyuv.b_grid ) - xyuv.b_grid = 0; - else - xyuv.b_grid = 1; - if( xyuv.b_pause ) - b_refresh = 1; - break; - - case SDLK_SPACE: - if( xyuv.b_pause ) - xyuv.b_pause = 0; - else - xyuv.b_pause = 1; - break; - case SDLK_LEFT: - if( xyuv.i_frame > 1 ) xyuv.i_frame--; - b_refresh = 1; - break; - - case SDLK_RIGHT: - if( xyuv.i_frame >= xyuv.i_frames ) - xyuv_count_frames( &xyuv ); - if( xyuv.i_frame < xyuv.i_frames ) xyuv.i_frame++; - b_refresh = 1; - break; - - case SDLK_HOME: - xyuv.i_frame = 1; - if( xyuv.b_pause ) - b_refresh = 1; - break; - - case SDLK_END: - xyuv_count_frames( &xyuv ); - xyuv.i_frame = xyuv.i_frames; - b_refresh = 1; - break; - - case SDLK_UP: - xyuv.i_frame += xyuv.i_frames / 20; - - if( xyuv.i_frame >= xyuv.i_frames ) - xyuv_count_frames( &xyuv ); - - if( xyuv.i_frame > xyuv.i_frames ) - xyuv.i_frame = xyuv.i_frames; - b_refresh = 1; - break; - - case SDLK_DOWN: - xyuv.i_frame -= xyuv.i_frames / 20; - if( xyuv.i_frame < 1 ) - xyuv.i_frame = 1; - b_refresh = 1; - break; - - case SDLK_PAGEUP: - xyuv.i_frame += xyuv.i_frames / 10; - - if( xyuv.i_frame >= xyuv.i_frames ) - xyuv_count_frames( &xyuv ); - - if( xyuv.i_frame > xyuv.i_frames ) - xyuv.i_frame = xyuv.i_frames; - b_refresh = 1; - break; - - case SDLK_PAGEDOWN: - xyuv.i_frame -= xyuv.i_frames / 10; - if( xyuv.i_frame < 1 ) - xyuv.i_frame = 1; - b_refresh = 1; - break; - - default: - break; - } - break; - case SDL_VIDEORESIZE: - xyuv.i_display_width = event.resize.w; - xyuv.i_display_height = event.resize.h; - xyuv.screen = SDL_SetVideoMode( xyuv.i_display_width, xyuv.i_display_height, 0, - SDL_HWSURFACE|SDL_RESIZABLE| - SDL_ASYNCBLIT|SDL_HWACCEL ); - xyuv_display( &xyuv, xyuv.i_frame ); - break; - - default: - break; - } - } - if( b_refresh ) { - xyuv.b_pause = 1; - xyuv_display( &xyuv, xyuv.i_frame ); - } - /* wait */ - i_wait = 1000 / xyuv.f_fps - ( SDL_GetTicks() - i_start); - if( i_wait < 0 ) - break; - else if( i_wait > 200 ) - SDL_Delay( 200 ); - else { - SDL_Delay( i_wait ); - break; - } - } - if( !xyuv.b_pause ) { - /* next frame */ - if( xyuv.i_frame == xyuv.i_frames ) - xyuv.b_pause = 1; - else if( xyuv.i_frame < xyuv.i_frames ) - xyuv.i_frame++; - } - } - - - return 0; - -err_missing_arg: - fprintf( stderr, "missing arg for option=%s\n", argv[i] ); - return -1; -} - - -static void xyuv_display( xyuv_t *xyuv, int i_frame ) -{ - SDL_Rect rect; - int i_picture = 0; - int i; - - if( i_frame > xyuv->i_frames ) - return; - - xyuv->i_frame = i_frame; - - /* Load and copy pictue data */ - for( i = 0; i < xyuv->i_yuv; i++ ) { - int i_plane; - - fprintf( stderr, "yuv[%d] %d/%d\n", i, i_frame, xyuv->yuv[i].i_frames ); - if( i_frame - 1 >= xyuv->yuv[i].i_frames ) { - xyuv_count_frames( xyuv ); - if( i_frame - 1 >= xyuv->yuv[i].i_frames ) - continue; - } - i_picture++; - - fseek( xyuv->yuv[i].f, (xyuv->i_frame-1) * xyuv->i_frame_size, SEEK_SET ); - fread( xyuv->pic, xyuv->i_frame_size, 1, xyuv->yuv[i].f ); - - SDL_LockYUVOverlay( xyuv->overlay ); - - if( xyuv->b_diff || xyuv->b_split ) { - /* Reset UV */ - for( i_plane = 1; i_plane < 3; i_plane++ ) { - memset( xyuv->overlay->pixels[i_plane], 128, xyuv->overlay->pitches[i_plane] * xyuv->overlay->h / 2 ); - } - /* Show diff in Y plane of overlay */ - - for( i_plane = 0; i_plane < 3; i_plane++ ) { - int div = i_plane == 0 ? 1 : 2; - uint8_t *src = xyuv->pic; - uint8_t *dst = xyuv->overlay->pixels[0] + - (xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[0] ); - int j; - if( i_plane == 1 ) { - src += 5*xyuv->i_width * xyuv->i_height/4; - dst += xyuv->i_width; - } else if( i_plane == 2 ) { - src += xyuv->i_width * xyuv->i_height; - dst += xyuv->i_width + xyuv->i_height / 2 * xyuv->overlay->pitches[0]; - } - - for( j = 0; j < xyuv->i_height / div; j++ ) { - if( i_picture == 1 || xyuv->b_split ) { - memcpy( dst, src, xyuv->i_width / div ); - } else { - int k; - for( k = 0; k < xyuv->i_width / div; k++ ) { - dst[k] = abs( dst[k] - src[k]); - } - } - src += xyuv->i_width / div; - dst += xyuv->overlay->pitches[0]; - } - } - } else { - for( i_plane = 0; i_plane < 3; i_plane++ ) { - int div = i_plane == 0 ? 1 : 2; - uint8_t *src = xyuv->pic; - uint8_t *dst = xyuv->overlay->pixels[i_plane] + - ((xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[i_plane] ) / div ); - int w = xyuv->i_width / div; - int j; - - if( i_plane == 1 ) { - src += 5*xyuv->i_width * xyuv->i_height/4; - } else if( i_plane == 2 ) { - src += xyuv->i_width * xyuv->i_height; - } - if( xyuv->i_join > 0 ) { - if( i_picture > 1 ) { - src += xyuv->i_join / div; - dst += xyuv->i_join / div; - w = (xyuv->i_width - xyuv->i_join) /div; - } else { - w = xyuv->i_join / div; - } - } - - for( j = 0; j < xyuv->i_height / div; j++ ) { - memcpy( dst, src, w ); - src += xyuv->i_width / div; - dst += xyuv->overlay->pitches[i_plane]; - } - } - } - - SDL_UnlockYUVOverlay( xyuv->overlay ); - } - - if( xyuv->f_y != 0.0 ) { - uint8_t *pix = xyuv->overlay->pixels[0]; - int j; - - for( j = 0; j < xyuv->i_sdl_height; j++ ) { - int k; - for( k = 0; k < xyuv->i_sdl_width; k++ ) { - int v= pix[k] * xyuv->f_y; - if( v > 255 ) - pix[k] = 255; - else if( v < 0 ) - pix[k] = 0; - else - pix[k] = v; - } - pix += xyuv->overlay->pitches[0]; - } - } - if( xyuv->b_grid ) { - int x, y; - - for( y = 0; y < xyuv->i_sdl_height; y += 4 ) { - uint8_t *p = xyuv->overlay->pixels[0] + y * xyuv->overlay->pitches[0]; - for( x = 0; x < xyuv->i_sdl_width; x += 4 ) { - if( x%16== 0 || y%16 == 0 ) - p[x] = 0; - } - } - } - - /* Update display */ - rect.x = 0; - rect.y = 0; - rect.w = xyuv->i_display_width; - rect.h = xyuv->i_display_height; - SDL_DisplayYUVOverlay( xyuv->overlay, &rect ); - - /* Display title */ - if( xyuv->title ) - free( xyuv->title ); - asprintf( &xyuv->title, SDL_TITLE, xyuv->yuv[0].name, xyuv->i_frame, xyuv->i_frames, xyuv->f_fps ); - SDL_WM_SetCaption( xyuv->title, "" ); -} - -static void xyuv_count_frames( xyuv_t *xyuv ) -{ - int i; - - xyuv->i_frames = 0; - if( xyuv->i_frame_size <= 0 ) - return; - - for( i = 0; i < xyuv->i_yuv; i++ ) { - /* Beurk but avoid using fstat */ - fseek( xyuv->yuv[i].f, 0, SEEK_END ); - - xyuv->yuv[i].i_frames = ftell( xyuv->yuv[i].f ) / xyuv->i_frame_size; - fprintf( stderr, "count (%d) -> %d\n", i, xyuv->yuv[i].i_frames ); - - fseek( xyuv->yuv[i].f, 0, SEEK_SET ); - - if( xyuv->i_frames < xyuv->yuv[i].i_frames ) - xyuv->i_frames = xyuv->yuv[i].i_frames; - } -} - -static inline int ssd( int a ) { return a*a; } - -static void xyuv_detect( int *pi_width, int *pi_height ) -{ - static const int pi_size[][2] = { - {128, 96}, - {160,120}, - {320,244}, - {320,288}, - - /* PAL */ - {176,144}, // QCIF - {352,288}, // CIF - {352,576}, // 1/2 D1 - {480,576}, // 2/3 D1 - {544,576}, - {640,576}, // VGA - {704,576}, // D1 - {720,576}, // D1 - - /* NTSC */ - {176,112}, // QCIF - {320,240}, // MPEG I - {352,240}, // CIF - {352,480}, // 1/2 D1 - {480,480}, // 2/3 D1 - {544,480}, - {640,480}, // VGA - {704,480}, // D1 - {720,480}, // D1 - - /* */ - {0,0}, - }; - int i_max; - int i_size_max; - uint8_t *pic; - int i; - - *pi_width = 0; - *pi_height = 0; - - /* Compute size max */ - for( i_max = 0, i_size_max = 0; - pi_size[i_max][0] != 0 && pi_size[i_max][1] != 0; i_max++ ) { - int s = pi_size[i_max][0] * pi_size[i_max][1] * 3 / 2; - - if( i_size_max < s ) - i_size_max = s; - } - - /* Temporary buffer */ - i_size_max *= 3; - pic = malloc( i_size_max ); - - fprintf( stderr, "guessing size for:\n" ); - for( i = 0; i < xyuv.i_yuv; i++ ) { - int j; - int i_read; - double dbest = 255*255; - int i_best = i_max; - int64_t t; - - fprintf( stderr, " - %s\n", xyuv.yuv[i].name ); - - i_read = fread( pic, 1, i_size_max, xyuv.yuv[i].f ); - if( i_read < 0 ) - continue; - - /* Check if file size is at least compatible with one format - * (if not, ignore file size)*/ - fseek( xyuv.yuv[i].f, 0, SEEK_END ); - t = ftell( xyuv.yuv[i].f ); - fseek( xyuv.yuv[i].f, 0, SEEK_SET ); - for( j = 0; j < i_max; j++ ) { - const int w = pi_size[j][0]; - const int h = pi_size[j][1]; - const int s = w * h * 3 / 2; - - if( t % s == 0 ) - break; - } - if( j == i_max ) - t = 0; - - - /* Try all size */ - for( j = 0; j < i_max; j++ ) { - const int w = pi_size[j][0]; - const int h = pi_size[j][1]; - const int s = w * h * 3 / 2; - double dd; - - int x, y, n; - int64_t d; - - /* To small */ - if( i_read < 3*s ) - continue; - /* Check file size */ - if( ( t > 0 && (t % s) != 0 ) ) { - fprintf( stderr, " * %dx%d ignored (incompatible file size)\n", w, h ); - continue; - } - - - /* We do a simple ssd between 2 consecutives lines */ - d = 0; - for( n = 0; n < 3; n++ ) { - uint8_t *p; - - /* Y */ - p = &pic[n*s]; - for( y = 0; y < h-1; y++ ) { - for( x = 0; x < w; x++ ) - d += ssd( p[x] - p[w+x] ); - p += w; - } - - /* U */ - p = &pic[n*s+w*h]; - for( y = 0; y < h/2-1; y++ ) { - for( x = 0; x < w/2; x++ ) - d += ssd( p[x] - p[(w/2)+x] ); - p += w/2; - } - - /* V */ - p = &pic[n*s+5*w*h/4]; - for( y = 0; y < h/2-1; y++ ) { - for( x = 0; x < w/2; x++ ) - d += ssd( p[x] - p[(w/2)+x] ); - p += w/2; - } - } - dd = (double)d / (3*w*h*3/2); - fprintf( stderr, " * %dx%d d=%f\n", w, h, dd ); - - if( dd < dbest ) { - i_best = j; - dbest = dd; - } - } - - fseek( xyuv.yuv[i].f, 0, SEEK_SET ); - - if( i_best < i_max ) { - fprintf( stderr, " -> %dx%d\n", pi_size[i_best][0], pi_size[i_best][1] ); - *pi_width = pi_size[i_best][0]; - *pi_height = pi_size[i_best][1]; - } - } - - free( pic ); -}
View file
x264-snapshot-20130723-2245.tar.bz2/AUTHORS -> x264-snapshot-20140321-2245.tar.bz2/AUTHORS
Changed
@@ -43,8 +43,8 @@ S: Brittany, France N: Henrik Gramner -E: hengar-6 AT student DOT ltu DOT se -D: 4:2:2 chroma subsampling, x86 asm +E: henrik AT gramner DOT com +D: 4:2:2 chroma subsampling, x86 asm, Windows improvements, bugfixes S: Sweden N: Jason Garrett-Glaser @@ -99,7 +99,3 @@ E: radoslaw AT syskin DOT cjb DOT net D: Cached motion compensation -N: Tuukka Toivonen -E: tuukkat AT ee DOT oulu DOT fi -D: Visualization -
View file
x264-snapshot-20130723-2245.tar.bz2/Makefile -> x264-snapshot-20140321-2245.tar.bz2/Makefile
Changed
@@ -69,9 +69,8 @@ SRCCLI += output/mp4.c endif -# Visualization sources -ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),) -SRCS += common/visualize.c common/display-x11.c +ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),) +SRCCLI += output/mp4_lsmash.c endif # MMX/SSE optims @@ -247,29 +246,29 @@ rm -f config.mak x264_config.h config.h config.log x264.pc x264.def install-cli: cli - install -d $(DESTDIR)$(bindir) - install x264$(EXE) $(DESTDIR)$(bindir) + $(INSTALL) -d $(DESTDIR)$(bindir) + $(INSTALL) x264$(EXE) $(DESTDIR)$(bindir) install-lib-dev: - install -d $(DESTDIR)$(includedir) - install -d $(DESTDIR)$(libdir) - install -d $(DESTDIR)$(libdir)/pkgconfig - install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir) - install -m 644 x264_config.h $(DESTDIR)$(includedir) - install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig + $(INSTALL) -d $(DESTDIR)$(includedir) + $(INSTALL) -d $(DESTDIR)$(libdir) + $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig + $(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir) + $(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir) + $(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig install-lib-static: lib-static install-lib-dev - install -m 644 $(LIBX264) $(DESTDIR)$(libdir) + $(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir) $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264)) install-lib-shared: lib-shared install-lib-dev ifneq ($(IMPLIBNAME),) - install -d $(DESTDIR)$(bindir) - install -m 755 $(SONAME) $(DESTDIR)$(bindir) - install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir) + $(INSTALL) -d $(DESTDIR)$(bindir) + $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir) + $(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir) else ifneq ($(SONAME),) ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX) - install -m 755 $(SONAME) $(DESTDIR)$(libdir) + $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir) endif uninstall:
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/asm.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/asm.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: arm utility macros ***************************************************************************** - * Copyright (C) 2008-2013 x264 project + * Copyright (C) 2008-2014 x264 project * * Authors: Mans Rullgard <mans@mansr.com> * David Conrad <lessen42@gmail.com> @@ -26,6 +26,8 @@ #include "config.h" +.syntax unified + #ifdef PREFIX # define EXTERN_ASM _ #else
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/cpu-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/cpu-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cpu-a.S: arm cpu detection ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * @@ -26,7 +26,7 @@ #include "asm.S" .fpu neon -.align +.align 2 // done in gas because .fpu neon overrides the refusal to assemble // instructions the selected -march/-mcpu doesn't support @@ -95,7 +95,7 @@ sub r2, r2, r1 cmpgt r2, #30 << 3 // assume context switch if it took over 30 cycles addle r3, r3, r2 - subles ip, ip, #1 + subsle ip, ip, #1 bgt average_loop // disable counters if we enabled them
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/dct-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct-a.S
Changed
@@ -1,7 +1,7 @@ /**************************************************************************** * dct-a.S: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/deblock-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/deblock-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: arm deblocking ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Mans Rullgard <mans@mansr.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Mans Rullgard <mans@mansr.com> @@ -167,7 +167,7 @@ ldr ip, [sp, #8] push {r4-r6,lr} cmp ip, #32 - ldrd r4, [sp, #16] + ldrd r4, r5, [sp, #16] mov lr, #\h beq x264_pixel_avg_w\w\()_neon rsbs r6, ip, #64 @@ -447,7 +447,7 @@ .ifc \type, full ldr lr, [r4, #32] // denom .endif - ldrd r4, [r4, #32+4] // scale, offset + ldrd r4, r5, [r4, #32+4] // scale, offset vdup.16 q0, r4 vdup.16 q1, r5 .ifc \type, full @@ -818,8 +818,8 @@ function x264_mc_chroma_neon push {r4-r8, lr} vpush {d8-d11} - ldrd r4, [sp, #56] - ldrd r6, [sp, #64] + ldrd r4, r5, [sp, #56] + ldrd r6, r7, [sp, #64] asr lr, r6, #3 mul lr, r4, lr @@ -1380,8 +1380,8 @@ function x264_frame_init_lowres_core_neon push {r4-r10,lr} vpush {d8-d15} - ldrd r4, [sp, #96] - ldrd r6, [sp, #104] + ldrd r4, r5, [sp, #96] + ldrd r6, r7, [sp, #104] ldr lr, [sp, #112] sub r10, r6, r7 // dst_stride - width and r10, r10, #~15
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * @@ -328,9 +328,9 @@ function x264_pixel_sad_x\x\()_\w\()x\h\()_neon push {r6-r7,lr} .if \x == 3 - ldrd r6, [sp, #12] + ldrd r6, r7, [sp, #12] .else - ldrd r6, [sp, #16] + ldrd r6, r7, [sp, #16] ldr r12, [sp, #12] .endif mov lr, #FENC_STRIDE @@ -519,6 +519,38 @@ b x264_var_end .endfunc +function x264_pixel_var_8x16_neon + vld1.64 {d16}, [r0,:64], r1 + vld1.64 {d18}, [r0,:64], r1 + vmull.u8 q1, d16, d16 + vmovl.u8 q0, d16 + vld1.64 {d20}, [r0,:64], r1 + vmull.u8 q2, d18, d18 + vaddw.u8 q0, q0, d18 + + mov ip, #12 + + vld1.64 {d22}, [r0,:64], r1 + VAR_SQR_SUM q1, q1, q14, d20, vpaddl.u16 + vld1.64 {d16}, [r0,:64], r1 + VAR_SQR_SUM q2, q2, q15, d22, vpaddl.u16 + +1: subs ip, ip, #4 + vld1.64 {d18}, [r0,:64], r1 + VAR_SQR_SUM q1, q14, q12, d16 + vld1.64 {d20}, [r0,:64], r1 + VAR_SQR_SUM q2, q15, q13, d18 + vld1.64 {d22}, [r0,:64], r1 + VAR_SQR_SUM q1, q12, q14, d20 + beq 2f + vld1.64 {d16}, [r0,:64], r1 + VAR_SQR_SUM q2, q13, q15, d22 + b 1b +2: + VAR_SQR_SUM q2, q13, q15, d22 + b x264_var_end +.endfunc + function x264_pixel_var_16x16_neon vld1.64 {d16-d17}, [r0,:128], r1 vmull.u8 q12, d16, d16 @@ -596,13 +628,56 @@ vadd.s32 d1, d2, d3 vpadd.s32 d0, d0, d1 - vmov.32 r0, r1, d0 + vmov r0, r1, d0 vst1.32 {d0[1]}, [ip,:32] mul r0, r0, r0 sub r0, r1, r0, lsr #6 bx lr .endfunc +function x264_pixel_var2_8x16_neon + vld1.64 {d16}, [r0,:64], r1 + vld1.64 {d17}, [r2,:64], r3 + vld1.64 {d18}, [r0,:64], r1 + vld1.64 {d19}, [r2,:64], r3 + vsubl.u8 q10, d16, d17 + vsubl.u8 q11, d18, d19 + SQR_ACC q1, d20, d21, vmull.s16 + vld1.64 {d16}, [r0,:64], r1 + vadd.s16 q0, q10, q11 + vld1.64 {d17}, [r2,:64], r3 + SQR_ACC q2, d22, d23, vmull.s16 + mov ip, #14 +1: subs ip, ip, #2 + vld1.64 {d18}, [r0,:64], r1 + vsubl.u8 q10, d16, d17 + vld1.64 {d19}, [r2,:64], r3 + vadd.s16 q0, q0, q10 + SQR_ACC q1, d20, d21 + vsubl.u8 q11, d18, d19 + beq 2f + vld1.64 {d16}, [r0,:64], r1 + vadd.s16 q0, q0, q11 + vld1.64 {d17}, [r2,:64], r3 + SQR_ACC q2, d22, d23 + b 1b +2: + vadd.s16 q0, q0, q11 + SQR_ACC q2, d22, d23 + + ldr ip, [sp] + vadd.s16 d0, d0, d1 + vadd.s32 q1, q1, q2 + vpaddl.s16 d0, d0 + vadd.s32 d1, d2, d3 + vpadd.s32 d0, d0, d1 + + vmov r0, r1, d0 + vst1.32 {d0[1]}, [ip,:32] + mul r0, r0, r0 + sub r0, r1, r0, lsr #7 + bx lr +.endfunc .macro LOAD_DIFF_8x4 q0 q1 q2 q3 vld1.32 {d1}, [r2], r3
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * @@ -56,8 +56,10 @@ int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); uint64_t x264_pixel_var_8x8_neon ( uint8_t *, intptr_t ); +uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t ); uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t ); -int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * ); +int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * ); +int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * ); uint64_t x264_pixel_hadamard_ac_8x8_neon ( uint8_t *, intptr_t ); uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Mans Rullgard <mans@mansr.com> @@ -181,9 +181,9 @@ function x264_predict_8x8_dc_neon mov ip, #0 - ldrd r2, [r1, #8] + ldrd r2, r3, [r1, #8] push {r4-r5,lr} - ldrd r4, [r1, #16] + ldrd r4, r5, [r1, #16] lsl r3, r3, #8 ldrb lr, [r1, #7] usad8 r2, r2, ip
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * @@ -26,6 +26,16 @@ #ifndef X264_ARM_PREDICT_H #define X264_ARM_PREDICT_H +void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] ); +void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] ); +void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] ); +void x264_predict_8x8c_dc_neon( pixel *src ); +void x264_predict_8x8c_h_neon( pixel *src ); +void x264_predict_8x8c_v_neon( pixel *src ); +void x264_predict_16x16_v_neon( pixel *src ); +void x264_predict_16x16_h_neon( pixel *src ); +void x264_predict_16x16_dc_neon( pixel *src ); + void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] ); void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/quant-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant-a.S
Changed
@@ -1,7 +1,7 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> * @@ -312,7 +312,7 @@ // int coeff_last( int16_t *l ) function x264_coeff_last4_arm - ldrd r2, [r0] + ldrd r2, r3, [r0] subs r0, r3, #0 movne r0, #2 movne r2, r3 @@ -341,7 +341,7 @@ subs r1, ip, r1, lsr #2 addge r0, r1, #\size - 8 - sublts r0, r3, r0, lsr #2 + subslt r0, r3, r0, lsr #2 movlt r0, #0 bx lr .endfunc @@ -390,7 +390,7 @@ subs r1, ip, r1 addge r0, r1, #32 - sublts r0, ip, r0 + subslt r0, ip, r0 movlt r0, #0 bx lr .endfunc
View file
x264-snapshot-20130723-2245.tar.bz2/common/arm/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/bitstream.c -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.c: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/bitstream.h -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/cabac.h -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cabac.h: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/common/common.c -> x264-snapshot-20140321-2245.tar.bz2/common/common.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * common.c: misc common functions ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -32,6 +32,9 @@ #if HAVE_MALLOC_H #include <malloc.h> #endif +#if HAVE_THP +#include <sys/mman.h> +#endif const int x264_bit_depth = BIT_DEPTH; @@ -342,7 +345,7 @@ param->analyse.i_luma_deadzone[1] = 6; param->rc.f_qcompress = 0.8; } - else if( !strncasecmp( s, "stillimage", 5 ) ) + else if( !strncasecmp( s, "stillimage", 10 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -3; @@ -668,6 +671,8 @@ } OPT("bluray-compat") p->b_bluray_compat = atobool(value); + OPT("avcintra-class") + p->i_avcintra_class = atoi(value); OPT("sar") { b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) && @@ -876,10 +881,6 @@ } OPT("log") p->i_log_level = atoi(value); -#if HAVE_VISUALIZE - OPT("visualize") - p->b_visualize = atobool(value); -#endif OPT("dump-yuv") p->psz_dump_yuv = strdup(value); OPT2("analyse", "partitions") @@ -1031,6 +1032,8 @@ p->b_vfr_input = !atobool(value); OPT("nal-hrd") b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd ); + OPT("filler") + p->rc.b_filler = atobool(value); OPT("pic-struct") p->b_pic_struct = atobool(value); OPT("fake-interlaced") @@ -1099,7 +1102,7 @@ break; } fprintf( stderr, "x264 [%s]: ", psz_prefix ); - vfprintf( stderr, psz_fmt, arg ); + x264_vfprintf( stderr, psz_fmt, arg ); } /**************************************************************************** @@ -1141,7 +1144,7 @@ }; int csp = i_csp & X264_CSP_MASK; - if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX ) + if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 ) return -1; x264_picture_init( pic ); pic->img.i_csp = i_csp; @@ -1183,7 +1186,25 @@ { uint8_t *align_buf = NULL; #if HAVE_MALLOC_H - align_buf = memalign( NATIVE_ALIGN, i_size ); +#if HAVE_THP +#define HUGE_PAGE_SIZE 2*1024*1024 +#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */ + /* Attempt to allocate huge pages to reduce TLB misses. */ + if( i_size >= HUGE_PAGE_THRESHOLD ) + { + align_buf = memalign( HUGE_PAGE_SIZE, i_size ); + if( align_buf ) + { + /* Round up to the next huge page boundary if we are close enough. */ + size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1); + madvise( align_buf, madv_size, MADV_HUGEPAGE ); + } + } + else +#undef HUGE_PAGE_SIZE +#undef HUGE_PAGE_THRESHOLD +#endif + align_buf = memalign( NATIVE_ALIGN, i_size ); #else uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) ); if( buf ) @@ -1246,7 +1267,7 @@ int b_error = 0; size_t i_size; char *buf; - FILE *fh = fopen( filename, "rb" ); + FILE *fh = x264_fopen( filename, "rb" ); if( !fh ) return NULL; b_error |= fseek( fh, 0, SEEK_END ) < 0; @@ -1383,7 +1404,7 @@ s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); if( p->rc.i_vbv_buffer_size ) - s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] ); + s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler ); if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom ) s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top, p->crop_rect.i_right, p->crop_rect.i_bottom );
View file
x264-snapshot-20130723-2245.tar.bz2/common/common.h -> x264-snapshot-20140321-2245.tar.bz2/common/common.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * common.h: misc common functions ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -54,6 +54,31 @@ memset( var, 0, size );\ } while( 0 ) +/* Macros for merging multiple allocations into a single large malloc, for improved + * use with huge pages. */ + +/* Needs to be enough to contain any set of buffers that use combined allocations */ +#define PREALLOC_BUF_SIZE 1024 + +#define PREALLOC_INIT\ + int prealloc_idx = 0;\ + size_t prealloc_size = 0;\ + uint8_t **preallocs[PREALLOC_BUF_SIZE]; + +#define PREALLOC( var, size )\ +do {\ + var = (void*)prealloc_size;\ + preallocs[prealloc_idx++] = (uint8_t**)&var;\ + prealloc_size += ALIGN(size, NATIVE_ALIGN);\ +} while(0) + +#define PREALLOC_END( ptr )\ +do {\ + CHECKED_MALLOC( ptr, prealloc_size );\ + while( prealloc_idx-- )\ + *preallocs[prealloc_idx] += (intptr_t)ptr;\ +} while(0) + #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) #define X264_BFRAME_MAX 16 @@ -84,6 +109,7 @@ #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame #define FILLER_OVERHEAD (NALU_OVERHEAD+1) +#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1))) /**************************************************************************** * Includes @@ -491,6 +517,9 @@ uint8_t *nal_buffer; int nal_buffer_size; + x264_t *reconfig_h; + int reconfig; + /**** thread synchronization starts here ****/ /* frame number/poc */ @@ -523,15 +552,15 @@ int (*dequant4_mf[4])[16]; /* [4][6][16] */ int (*dequant8_mf[4])[64]; /* [4][6][64] */ /* quantization matrix for trellis, [cqm][qp][coef] */ - int (*unquant4_mf[4])[16]; /* [4][52][16] */ - int (*unquant8_mf[4])[64]; /* [4][52][64] */ + int (*unquant4_mf[4])[16]; /* [4][QP_MAX_SPEC+1][16] */ + int (*unquant8_mf[4])[64]; /* [4][QP_MAX_SPEC+1][64] */ /* quantization matrix for deadzone */ - udctcoef (*quant4_mf[4])[16]; /* [4][52][16] */ - udctcoef (*quant8_mf[4])[64]; /* [4][52][64] */ - udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */ - udctcoef (*quant8_bias[4])[64]; /* [4][52][64] */ - udctcoef (*quant4_bias0[4])[16]; /* [4][52][16] */ - udctcoef (*quant8_bias0[4])[64]; /* [4][52][64] */ + udctcoef (*quant4_mf[4])[16]; /* [4][QP_MAX_SPEC+1][16] */ + udctcoef (*quant8_mf[4])[64]; /* [4][QP_MAX_SPEC+1][64] */ + udctcoef (*quant4_bias[4])[16]; /* [4][QP_MAX_SPEC+1][16] */ + udctcoef (*quant8_bias[4])[64]; /* [4][QP_MAX_SPEC+1][64] */ + udctcoef (*quant4_bias0[4])[16]; /* [4][QP_MAX_SPEC+1][16] */ + udctcoef (*quant8_bias0[4])[64]; /* [4][QP_MAX_SPEC+1][64] */ udctcoef (*nr_offset_emergency)[4][64]; /* mv/ref cost arrays. */ @@ -699,6 +728,7 @@ * and won't be copied from one thread to another */ /* mb table */ + uint8_t *base; /* base pointer for all malloced data in this mb */ int8_t *type; /* mb type */ uint8_t *partition; /* mb partition */ int8_t *qp; /* mb qp */ @@ -937,9 +967,6 @@ x264_deblock_function_t loopf; x264_bitstream_function_t bsf; -#if HAVE_VISUALIZE - struct visualize_t *visualize; -#endif x264_lookahead_t *lookahead; #if HAVE_OPENCL
View file
x264-snapshot-20130723-2245.tar.bz2/common/cpu.c -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cpu.c: cpu detection ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -74,7 +74,6 @@ #undef MMX2 {"Cache32", X264_CPU_CACHELINE_32}, {"Cache64", X264_CPU_CACHELINE_64}, - {"SSEMisalign", X264_CPU_SSE_MISALIGN}, {"LZCNT", X264_CPU_LZCNT}, {"BMI1", X264_CPU_BMI1}, {"BMI2", X264_CPU_BMI1|X264_CPU_BMI2}, @@ -123,7 +122,7 @@ uint32_t cpu = 0; uint32_t eax, ebx, ecx, edx; uint32_t vendor[4] = {0}; - uint32_t max_extended_cap; + uint32_t max_extended_cap, max_basic_cap; int cache; #if !ARCH_X86_64 @@ -132,7 +131,8 @@ #endif x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 ); - if( eax == 0 ) + max_basic_cap = eax; + if( max_basic_cap == 0 ) return 0; x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx ); @@ -169,15 +169,18 @@ } } - x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx ); - /* AVX2 requires OS support, but BMI1/2 don't. */ - if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) ) - cpu |= X264_CPU_AVX2; - if( ebx&0x00000008 ) + if( max_basic_cap >= 7 ) { - cpu |= X264_CPU_BMI1; - if( ebx&0x00000100 ) - cpu |= X264_CPU_BMI2; + x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx ); + /* AVX2 requires OS support, but BMI1/2 don't. */ + if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) ) + cpu |= X264_CPU_AVX2; + if( ebx&0x00000008 ) + { + cpu |= X264_CPU_BMI1; + if( ebx&0x00000100 ) + cpu |= X264_CPU_BMI2; + } } if( cpu & X264_CPU_SSSE3 ) @@ -210,12 +213,6 @@ } } - if( ecx&0x00000080 ) /* Misalign SSE */ - { - cpu |= X264_CPU_SSE_MISALIGN; - x264_cpu_mask_misalign_sse(); - } - if( cpu & X264_CPU_AVX ) { if( ecx&0x00000800 ) /* XOP */ @@ -274,7 +271,7 @@ x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx ); cache = ecx&0xff; // cacheline size } - if( !cache ) + if( !cache && max_basic_cap >= 2 ) { // Cache and TLB Information static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 }; @@ -307,7 +304,7 @@ x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" ); } -#if BROKEN_STACK_ALIGNMENT +#if STACK_ALIGNMENT < 16 cpu |= X264_CPU_STACK_MOD4; #endif @@ -429,6 +426,10 @@ return sysconf( _SC_NPROCESSORS_ONLN ); #elif SYS_LINUX +#ifdef __ANDROID__ + // Android NDK does not expose sched_getaffinity + return sysconf( _SC_NPROCESSORS_CONF ); +#else cpu_set_t p_aff; memset( &p_aff, 0, sizeof(p_aff) ); if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) ) @@ -441,6 +442,7 @@ np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1; return np; #endif +#endif #elif SYS_BEOS system_info info;
View file
x264-snapshot-20130723-2245.tar.bz2/common/cpu.h -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cpu.h: cpu detection ***************************************************************************** - * Copyright (C) 2004-2013 x264 project + * Copyright (C) 2004-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * @@ -45,7 +45,6 @@ #define x264_emms() #endif #define x264_sfence x264_cpu_sfence -void x264_cpu_mask_misalign_sse( void ); void x264_safe_intel_cpu_indicator_init( void ); /* kludge: @@ -58,8 +57,8 @@ * alignment between functions (osdep.h handles manual alignment of arrays * if it doesn't). */ -#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX -int x264_stack_align( void (*func)(), ... ); +#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX +intptr_t x264_stack_align( void (*func)(), ... ); #define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__) #else #define x264_stack_align(func,...) func(__VA_ARGS__)
View file
x264-snapshot-20130723-2245.tar.bz2/common/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/dct.c
Changed
@@ -1,11 +1,11 @@ /***************************************************************************** * dct.c: transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20130723-2245.tar.bz2/common/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: transform and zigzag ***************************************************************************** - * Copyright (C) 2004-2013 x264 project + * Copyright (C) 2004-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/deblock.c
Changed
@@ -1,12 +1,12 @@ /***************************************************************************** * deblock.c: deblocking ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20130723-2245.tar.bz2/common/frame.c -> x264-snapshot-20140321-2245.tar.bz2/common/frame.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * frame.c: frame handling ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -53,6 +53,7 @@ case X264_CSP_NV16: case X264_CSP_I422: case X264_CSP_YV16: + case X264_CSP_V210: return X264_CSP_NV16; case X264_CSP_I444: case X264_CSP_YV24: @@ -86,6 +87,7 @@ #endif CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) ); + PREALLOC_INIT /* allocate frame data (+64 for extra data for me) */ i_width = h->mb.i_mb_width*16; @@ -124,7 +126,7 @@ for( int i = 0; i < h->param.i_bframe + 2; i++ ) for( int j = 0; j < h->param.i_bframe + 2; j++ ) - CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) ); + PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) ); frame->i_poc = -1; frame->i_type = X264_TYPE_AUTO; @@ -149,13 +151,9 @@ { int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12); int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv)); - CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) ); - frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH; + PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) ); if( PARAM_INTERLACED ) - { - CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) ); - frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH; - } + PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) ); } /* all 4 luma planes allocated together, since the cacheline split code @@ -167,24 +165,15 @@ if( h->param.analyse.i_subpel_refine && b_fdec ) { /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */ - CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) ); + PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) ); if( PARAM_INTERLACED ) - CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) ); - for( int i = 0; i < 4; i++ ) - { - frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH; - frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH; - } - frame->plane[p] = frame->filtered[p][0]; - frame->plane_fld[p] = frame->filtered_fld[p][0]; + PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) ); } else { - CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) ); + PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) ); if( PARAM_INTERLACED ) - CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) ); - frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH; - frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH; + PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) ); } } @@ -192,36 +181,30 @@ if( b_fdec ) /* fdec frame */ { - CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t)); - CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t)); - CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) ); - CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) ); - M32( frame->mv16x16[0] ) = 0; - frame->mv16x16++; - CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) ); + PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) ); + PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) ); + PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) ); + PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) ); + PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) ); if( h->param.i_bframe ) { - CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) ); - CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) ); + PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) ); + PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) ); } else { frame->mv[1] = NULL; frame->ref[1] = NULL; } - CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) ); - CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) ); - CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) ); + PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) ); + PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) ); + PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) ); if( h->param.analyse.i_me_method >= X264_ME_ESA ) - { - CHECKED_MALLOC( frame->buffer[3], - frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa ); - frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH; - } + PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa ); if( PARAM_INTERLACED ) - CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) ); + PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) ); if( h->param.analyse.b_mb_info ) - CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) ); + PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) ); } else /* fenc frame */ { @@ -229,30 +212,85 @@ { int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); - CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) ); - for( int i = 0; i < 4; i++ ) - frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size; + PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) ); for( int j = 0; j <= !!h->param.i_bframe; j++ ) for( int i = 0; i <= h->param.i_bframe; i++ ) { - CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) ); - CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) ); + PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) ); + PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) ); } - CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) ); + PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) ); for( int j = 0; j <= h->param.i_bframe+1; j++ ) for( int i = 0; i <= h->param.i_bframe+1; i++ ) - CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) ); - frame->i_intra_cost = frame->lowres_costs[0][0]; - memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) ); + PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) ); + } if( h->param.rc.i_aq_mode ) { - CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) ); - CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) ); + PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) ); + PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) ); if( h->frames.b_have_lowres ) + PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) ); + } + } + + PREALLOC_END( frame->base ); + + if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 ) + { + int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12); + frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH; + if( PARAM_INTERLACED ) + frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH; + } + + for( int p = 0; p < luma_plane_count; p++ ) + { + int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign ); + if( h->param.analyse.i_subpel_refine && b_fdec ) + { + for( int i = 0; i < 4; i++ ) + { + frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH; + frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH; + } + frame->plane[p] = frame->filtered[p][0]; + frame->plane_fld[p] = frame->filtered_fld[p][0]; + } + else + { + frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH; + frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH; + } + } + + if( b_fdec ) + { + M32( frame->mv16x16[0] ) = 0; + frame->mv16x16++; + + if( h->param.analyse.i_me_method >= X264_ME_ESA ) + frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH; + } + else + { + if( h->frames.b_have_lowres ) + { + int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); + for( int i = 0; i < 4; i++ ) + frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size; + + for( int j = 0; j <= !!h->param.i_bframe; j++ ) + for( int i = 0; i <= h->param.i_bframe; i++ ) + memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) ); + + frame->i_intra_cost = frame->lowres_costs[0][0]; + memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) ); + + if( h->param.rc.i_aq_mode ) /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */ - CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) ); + memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) ); } } @@ -278,42 +316,8 @@ * so freeing those pointers would cause a double free later. */ if( !frame->b_duplicate ) { - for( int i = 0; i < 4; i++ ) - { - x264_free( frame->buffer[i] ); - x264_free( frame->buffer_fld[i] ); - } - for( int i = 0; i < 4; i++ ) - x264_free( frame->buffer_lowres[i] ); - for( int i = 0; i < X264_BFRAME_MAX+2; i++ ) - for( int j = 0; j < X264_BFRAME_MAX+2; j++ ) - x264_free( frame->i_row_satds[i][j] ); - for( int j = 0; j < 2; j++ ) - for( int i = 0; i <= X264_BFRAME_MAX; i++ ) - { - x264_free( frame->lowres_mvs[j][i] ); - x264_free( frame->lowres_mv_costs[j][i] ); - } - x264_free( frame->i_propagate_cost ); - for( int j = 0; j <= X264_BFRAME_MAX+1; j++ ) - for( int i = 0; i <= X264_BFRAME_MAX+1; i++ ) - x264_free( frame->lowres_costs[j][i] ); - x264_free( frame->f_qp_offset ); - x264_free( frame->f_qp_offset_aq ); - x264_free( frame->i_inv_qscale_factor ); - x264_free( frame->i_row_bits ); - x264_free( frame->f_row_qp ); - x264_free( frame->f_row_qscale ); - x264_free( frame->field ); - x264_free( frame->effective_qp ); - x264_free( frame->mb_type ); - x264_free( frame->mb_partition ); - x264_free( frame->mv[0] ); - x264_free( frame->mv[1] ); - if( frame->mv16x16 ) - x264_free( frame->mv16x16-1 ); - x264_free( frame->ref[0] ); - x264_free( frame->ref[1] ); + x264_free( frame->base ); + if( frame->param && frame->param->param_free ) frame->param->param_free( frame->param ); if( frame->mb_info_free ) @@ -377,6 +381,12 @@ } #endif + if( BIT_DEPTH != 10 && i_csp == X264_CSP_V210 ) + { + x264_log( h, X264_LOG_ERROR, "v210 input is only compatible with bit-depth of 10 bits\n" ); + return -1; + } + dst->i_type = src->i_type; dst->i_qpplus1 = src->i_qpplus1; dst->i_pts = dst->i_reordered_pts = src->i_pts; @@ -389,7 +399,16 @@ uint8_t *pix[3]; int stride[3]; - if ( i_csp >= X264_CSP_BGR ) + if( i_csp == X264_CSP_V210 ) + { + stride[0] = src->img.i_stride[0]; + pix[0] = src->img.plane[0]; + + h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0], + dst->plane[1], dst->i_stride[1], + (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height ); + } + else if( i_csp >= X264_CSP_BGR ) { stride[0] = src->img.i_stride[0]; pix[0] = src->img.plane[0];
View file
x264-snapshot-20130723-2245.tar.bz2/common/frame.h -> x264-snapshot-20140321-2245.tar.bz2/common/frame.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * frame.h: frame handling ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -35,6 +35,7 @@ typedef struct x264_frame { /* */ + uint8_t *base; /* Base pointer for all malloced data in this frame. */ int i_poc; int i_delta_poc[2]; int i_type;
View file
x264-snapshot-20130723-2245.tar.bz2/common/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.c
Changed
@@ -1,12 +1,12 @@ /***************************************************************************** * macroblock.c: macroblock common functions ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Jason Garrett-Glaser <darkshikari@gmail.com> * Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -256,25 +256,26 @@ h->mb.b_interlaced = PARAM_INTERLACED; - CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) ); - CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) ); - CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) ); - CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) ); - memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) ); + PREALLOC_INIT + + PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) ); + PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) ); + PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) ); + PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) ); /* 0 -> 3 top(4), 4 -> 6 : left(3) */ - CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) ); + PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) ); /* all coeffs */ - CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) ); + PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) ); if( h->param.b_cabac ) { - CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) ); - CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) ); - CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) ); + PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) ); + PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) ); + PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) ); if( h->param.i_bframe ) - CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) ); + PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) ); } for( int i = 0; i < 2; i++ ) @@ -284,11 +285,7 @@ i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit for( int j = !i; j < i_refs; j++ ) - { - CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) ); - M32( h->mb.mvr[i][j][0] ) = 0; - h->mb.mvr[i][j]++; - } + PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) ); } if( h->param.analyse.i_weighted_pred ) @@ -325,7 +322,24 @@ } for( int i = 0; i < numweightbuf; i++ ) - CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) ); + PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) ); + } + + PREALLOC_END( h->mb.base ); + + memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) ); + + for( int i = 0; i < 2; i++ ) + { + int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED; + if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART ) + i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit + + for( int j = !i; j < i_refs; j++ ) + { + M32( h->mb.mvr[i][j][0] ) = 0; + h->mb.mvr[i][j]++; + } } return 0; @@ -334,26 +348,7 @@ } void x264_macroblock_cache_free( x264_t *h ) { - for( int i = 0; i < 2; i++ ) - for( int j = !i; j < X264_REF_MAX*2; j++ ) - if( h->mb.mvr[i][j] ) - x264_free( h->mb.mvr[i][j]-1 ); - for( int i = 0; i < X264_REF_MAX; i++ ) - x264_free( h->mb.p_weight_buf[i] ); - - if( h->param.b_cabac ) - { - x264_free( h->mb.skipbp ); - x264_free( h->mb.chroma_pred_mode ); - x264_free( h->mb.mvd[0] ); - x264_free( h->mb.mvd[1] ); - } - x264_free( h->mb.slice_table ); - x264_free( h->mb.intra4x4_pred_mode ); - x264_free( h->mb.non_zero_count ); - x264_free( h->mb.mb_transform_size ); - x264_free( h->mb.cbp ); - x264_free( h->mb.qp ); + x264_free( h->mb.base ); } int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead ) @@ -394,7 +389,7 @@ ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t)); scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa ); } - int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int); + int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t); scratch_size = X264_MAX( scratch_size, buf_mbtree ); if( scratch_size ) CHECKED_MALLOC( h->scratch_buffer, scratch_size ); @@ -402,7 +397,9 @@ h->scratch_buffer = NULL; int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2; - CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads ); + int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */ + scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 ); + CHECKED_MALLOC( h->scratch_buffer2, scratch_size ); return 0; fail: @@ -1258,8 +1255,13 @@ } } - if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 ) - h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride]; + if( b_mbaff && mb_x == 0 && !(mb_y&1) ) + { + if( h->mb.i_mb_top_xy >= h->sh.i_first_mb ) + h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy]; + else + h->mb.field_decoding_flag = 0; + } /* Check whether skip here would cause decoder to predict interlace mode incorrectly. * FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */ @@ -1267,26 +1269,8 @@ if( b_mbaff ) { if( MB_INTERLACED != h->mb.field_decoding_flag && - h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) ) + (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) ) h->mb.b_allow_skip = 0; - if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) ) - { - if( h->mb.i_neighbour & MB_LEFT ) - { - if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED ) - h->mb.b_allow_skip = 0; - } - else if( h->mb.i_neighbour & MB_TOP ) - { - if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED ) - h->mb.b_allow_skip = 0; - } - else // Frame mb pair is predicted - { - if( MB_INTERLACED ) - h->mb.b_allow_skip = 0; - } - } } if( h->param.b_cabac )
View file
x264-snapshot-20130723-2245.tar.bz2/common/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock common functions ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/common/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/mc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: motion compensation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -336,6 +336,34 @@ } } +void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty, + pixel *dstc, intptr_t i_dstc, + uint32_t *src, intptr_t i_src, int w, int h ) +{ + for( int l = 0; l < h; l++ ) + { + pixel *dsty0 = dsty; + pixel *dstc0 = dstc; + uint32_t *src0 = src; + + for( int n = 0; n < w; n += 3 ) + { + *(dstc0++) = *src0 & 0x03FF; + *(dsty0++) = ( *src0 >> 10 ) & 0x03FF; + *(dstc0++) = ( *src0 >> 20 ) & 0x03FF; + src0++; + *(dsty0++) = *src0 & 0x03FF; + *(dstc0++) = ( *src0 >> 10 ) & 0x03FF; + *(dsty0++) = ( *src0 >> 20 ) & 0x03FF; + src0++; + } + + dsty += i_dsty; + dstc += i_dstc; + src += i_src; + } +} + static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ) { for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE ) @@ -455,20 +483,97 @@ /* Estimate the total amount of influence on future quality that could be had if we * were to improve the reference samples used to inter predict any given macroblock. */ -static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, +static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ) { - float fps = *fps_factor / 256.f; + float fps = *fps_factor; for( int i = 0; i < len; i++ ) { - float intra_cost = intra_costs[i] * inv_qscales[i]; - float propagate_amount = propagate_in[i] + intra_cost*fps; - float propagate_num = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK); - float propagate_denom = intra_costs[i]; - dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f); + int intra_cost = intra_costs[i]; + int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK); + float propagate_intra = intra_cost * inv_qscales[i]; + float propagate_amount = propagate_in[i] + propagate_intra*fps; + float propagate_num = intra_cost - inter_cost; + float propagate_denom = intra_cost; + dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767); } } +static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2], + int16_t *propagate_amount, uint16_t *lowres_costs, + int bipred_weight, int mb_y, int len, int list ) +{ + unsigned stride = h->mb.i_mb_stride; + unsigned width = h->mb.i_mb_width; + unsigned height = h->mb.i_mb_height; + + for( unsigned i = 0; i < len; i++ ) + { +#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1) + int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT; + + if( !(lists_used & (1 << list)) ) + continue; + + int listamount = propagate_amount[i]; + /* Apply bipred weighting. */ + if( lists_used == 3 ) + listamount = (listamount * bipred_weight + 32) >> 6; + + /* Early termination for simple case of mv0. */ + if( !M32( mvs[i] ) ) + { + CLIP_ADD( ref_costs[mb_y*stride + i], listamount ); + continue; + } + + int x = mvs[i][0]; + int y = mvs[i][1]; + unsigned mbx = (x>>5)+i; + unsigned mby = (y>>5)+mb_y; + unsigned idx0 = mbx + mby * stride; + unsigned idx2 = idx0 + stride; + x &= 31; + y &= 31; + int idx0weight = (32-y)*(32-x); + int idx1weight = (32-y)*x; + int idx2weight = y*(32-x); + int idx3weight = y*x; + idx0weight = (idx0weight * listamount + 512) >> 10; + idx1weight = (idx1weight * listamount + 512) >> 10; + idx2weight = (idx2weight * listamount + 512) >> 10; + idx3weight = (idx3weight * listamount + 512) >> 10; + + if( mbx < width-1 && mby < height-1 ) + { + CLIP_ADD( ref_costs[idx0+0], idx0weight ); + CLIP_ADD( ref_costs[idx0+1], idx1weight ); + CLIP_ADD( ref_costs[idx2+0], idx2weight ); + CLIP_ADD( ref_costs[idx2+1], idx3weight ); + } + else + { + /* Note: this takes advantage of unsigned representation to + * catch negative mbx/mby. */ + if( mby < height ) + { + if( mbx < width ) + CLIP_ADD( ref_costs[idx0+0], idx0weight ); + if( mbx+1 < width ) + CLIP_ADD( ref_costs[idx0+1], idx1weight ); + } + if( mby+1 < height ) + { + if( mbx < width ) + CLIP_ADD( ref_costs[idx2+0], idx2weight ); + if( mbx+1 < width ) + CLIP_ADD( ref_costs[idx2+1], idx3weight ); + } + } + } +#undef CLIP_ADD +} + void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ) { pf->mc_luma = mc_luma; @@ -507,6 +612,7 @@ pf->plane_copy_interleave = x264_plane_copy_interleave_c; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c; + pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c; pf->hpel_filter = hpel_filter; @@ -523,6 +629,7 @@ pf->integral_init8v = integral_init8v; pf->mbtree_propagate_cost = mbtree_propagate_cost; + pf->mbtree_propagate_list = mbtree_propagate_list; #if HAVE_MMX x264_mc_init_mmx( cpu, pf ); @@ -536,7 +643,10 @@ #endif if( cpu_independent ) + { pf->mbtree_propagate_cost = mbtree_propagate_cost; + pf->mbtree_propagate_list = mbtree_propagate_list; + } } void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
View file
x264-snapshot-20130723-2245.tar.bz2/common/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: motion compensation ***************************************************************************** - * Copyright (C) 2004-2013 x264 project + * Copyright (C) 2004-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * @@ -93,6 +93,9 @@ pixel *src, intptr_t i_src, int w, int h ); void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); + void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty, + pixel *dstc, intptr_t i_dstc, + uint32_t *src, intptr_t i_src, int w, int h ); void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, intptr_t i_stride, int i_width, int i_height, int16_t *buf ); @@ -119,8 +122,12 @@ weight_fn_t *offsetsub; void (*weight_cache)( x264_t *, x264_weight_t * ); - void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, + void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); + + void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2], + int16_t *propagate_amount, uint16_t *lowres_costs, + int bipred_weight, int mb_y, int len, int list ); } x264_mc_functions_t; void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );
View file
x264-snapshot-20130723-2245.tar.bz2/common/mvpred.c -> x264-snapshot-20140321-2245.tar.bz2/common/mvpred.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mvpred.c: motion vector prediction ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/opencl.c -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * opencl.c: OpenCL initialization and kernel compilation ***************************************************************************** - * Copyright (C) 2012-2013 x264 project + * Copyright (C) 2012-2014 x264 project * * Authors: Steve Borho <sborho@multicorewareinc.com> * Anton Mitrofanov <BugMaster@narod.ru> @@ -28,7 +28,7 @@ #ifdef _WIN32 #include <windows.h> -#define ocl_open LoadLibrary( "OpenCL" ) +#define ocl_open LoadLibraryW( L"OpenCL" ) #define ocl_close FreeLibrary #define ocl_address GetProcAddress #else @@ -119,10 +119,10 @@ /* Try to load the cached compiled program binary, verify the device context is * still valid before reuse */ -static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version ) +static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version ) { /* try to load cached program binary */ - FILE *fp = fopen( h->param.psz_clbin_file, "rb" ); + FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" ); if( !fp ) return NULL; @@ -167,9 +167,9 @@ /* Save the compiled program binary to a file for later reuse. Device context * is also saved in the cache file so we do not reuse stale binaries */ -static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version ) +static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version ) { - FILE *fp = fopen( h->param.psz_clbin_file, "wb" ); + FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" ); if( !fp ) { x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" ); @@ -304,7 +304,7 @@ goto fail; } - FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" ); + FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" ); if( !log_file ) { x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" ); @@ -672,9 +672,9 @@ int ret = 0; #ifdef _WIN32 - hDLL = LoadLibrary( "atiadlxx.dll" ); + hDLL = LoadLibraryW( L"atiadlxx.dll" ); if( !hDLL ) - hDLL = LoadLibrary( "atiadlxy.dll" ); + hDLL = LoadLibraryW( L"atiadlxy.dll" ); #else hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL ); #endif @@ -685,7 +685,7 @@ ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy"); ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get"); ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get"); - if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get || + if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get || !ADL_PowerXpress_Scheme_Get ) goto fail1;
View file
x264-snapshot-20130723-2245.tar.bz2/common/opencl.h -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * opencl.h: OpenCL structures and defines ***************************************************************************** - * Copyright (C) 2012-2013 x264 project + * Copyright (C) 2012-2014 x264 project * * Authors: Steve Borho <sborho@multicorewareinc.com> * Anton Mitrofanov <BugMaster@narod.ru>
View file
x264-snapshot-20130723-2245.tar.bz2/common/osdep.c -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.c
Changed
@@ -1,10 +1,11 @@ /***************************************************************************** * osdep.c: platform-specific code ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * Laurent Aimar <fenrir@via.ecp.fr> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,6 +27,11 @@ #include "common.h" +#ifdef _WIN32 +#include <windows.h> +#include <io.h> +#endif + #if SYS_WINDOWS #include <sys/types.h> #include <sys/timeb.h> @@ -35,8 +41,6 @@ #include <time.h> #if PTW32_STATIC_LIB -#define WIN32_LEAN_AND_MEAN -#include <windows.h> /* this is a global in pthread-win32 to indicate if it has been initialized or not */ extern int ptw32_processInitialized; #endif @@ -134,3 +138,73 @@ {} #endif #endif + +#ifdef _WIN32 +/* Functions for dealing with Unicode on Windows. */ +FILE *x264_fopen( const char *filename, const char *mode ) +{ + wchar_t filename_utf16[MAX_PATH]; + wchar_t mode_utf16[16]; + if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) ) + return _wfopen( filename_utf16, mode_utf16 ); + return NULL; +} + +int x264_rename( const char *oldname, const char *newname ) +{ + wchar_t oldname_utf16[MAX_PATH]; + wchar_t newname_utf16[MAX_PATH]; + if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) ) + { + /* POSIX says that rename() removes the destination, but Win32 doesn't. */ + _wunlink( newname_utf16 ); + return _wrename( oldname_utf16, newname_utf16 ); + } + return -1; +} + +int x264_stat( const char *path, x264_struct_stat *buf ) +{ + wchar_t path_utf16[MAX_PATH]; + if( utf8_to_utf16( path, path_utf16 ) ) + return _wstati64( path_utf16, buf ); + return -1; +} + +int x264_vfprintf( FILE *stream, const char *format, va_list arg ) +{ + HANDLE console = NULL; + DWORD mode; + + if( stream == stdout ) + console = GetStdHandle( STD_OUTPUT_HANDLE ); + else if( stream == stderr ) + console = GetStdHandle( STD_ERROR_HANDLE ); + + /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */ + if( GetConsoleMode( console, &mode ) ) + { + char buf[4096]; + wchar_t buf_utf16[4096]; + + int length = vsnprintf( buf, sizeof(buf), format, arg ); + if( length > 0 && length < sizeof(buf) ) + { + /* WriteConsoleW is the most reliable way to output Unicode to a console. */ + int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) ); + DWORD written; + WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL ); + return length; + } + } + return vfprintf( stream, format, arg ); +} + +int x264_is_pipe( const char *path ) +{ + wchar_t path_utf16[MAX_PATH]; + if( utf8_to_utf16( path, path_utf16 ) ) + return WaitNamedPipeW( path_utf16, 0 ); + return 0; +} +#endif
View file
x264-snapshot-20130723-2245.tar.bz2/common/osdep.h -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.h
Changed
@@ -1,10 +1,11 @@ /***************************************************************************** * osdep.h: platform-specific code ***************************************************************************** - * Copyright (C) 2007-2013 x264 project + * Copyright (C) 2007-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,19 +33,21 @@ #include <stdio.h> #include <sys/stat.h> #include <inttypes.h> +#include <stdarg.h> #include "config.h" +#ifdef __INTEL_COMPILER +#include <mathimf.h> +#else +#include <math.h> +#endif + #if !HAVE_LOG2F #define log2f(x) (logf(x)/0.693147180559945f) #define log2(x) (log(x)/0.693147180559945) #endif -#ifdef _WIN32 -#include <io.h> // _setmode() -#include <fcntl.h> // _O_BINARY -#endif - #ifdef __ICL #define inline __inline #define strcasecmp _stricmp @@ -54,12 +57,6 @@ #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) #endif -#ifdef __INTEL_COMPILER -#include <mathimf.h> -#else -#include <math.h> -#endif - #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64) #define HAVE_X86_INLINE_ASM 1 #endif @@ -67,11 +64,29 @@ #if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS) #define isfinite finite #endif + #ifdef _WIN32 -#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't. #ifndef strtok_r #define strtok_r(str,delim,save) strtok(str,delim) #endif + +#define utf8_to_utf16( utf8, utf16 )\ + MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) ) +FILE *x264_fopen( const char *filename, const char *mode ); +int x264_rename( const char *oldname, const char *newname ); +#define x264_struct_stat struct _stati64 +#define x264_fstat _fstati64 +int x264_stat( const char *path, x264_struct_stat *buf ); +int x264_vfprintf( FILE *stream, const char *format, va_list arg ); +int x264_is_pipe( const char *path ); +#else +#define x264_fopen fopen +#define x264_rename rename +#define x264_struct_stat struct stat +#define x264_fstat fstat +#define x264_stat stat +#define x264_vfprintf vfprintf +#define x264_is_pipe(x) 0 #endif #ifdef __ICL @@ -111,7 +126,7 @@ #define EXPAND(x) x -#if HAVE_32B_STACK_ALIGNMENT +#if STACK_ALIGNMENT >= 32 #define ALIGNED_ARRAY_32( type, name, sub1, ... )\ ALIGNED_32( type name sub1 __VA_ARGS__ ) #else @@ -364,19 +379,19 @@ #define x264_lower_thread_priority(p) #endif -static inline uint8_t x264_is_regular_file( FILE *filehandle ) +static inline int x264_is_regular_file( FILE *filehandle ) { - struct stat file_stat; - if( fstat( fileno( filehandle ), &file_stat ) ) - return -1; + x264_struct_stat file_stat; + if( x264_fstat( fileno( filehandle ), &file_stat ) ) + return 1; return S_ISREG( file_stat.st_mode ); } -static inline uint8_t x264_is_regular_file_path( const char *filename ) +static inline int x264_is_regular_file_path( const char *filename ) { - struct stat file_stat; - if( stat( filename, &file_stat ) ) - return -1; + x264_struct_stat file_stat; + if( x264_stat( filename, &file_stat ) ) + return !x264_is_pipe( filename ); return S_ISREG( file_stat.st_mode ); }
View file
x264-snapshot-20130723-2245.tar.bz2/common/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -36,6 +36,7 @@ #endif #if ARCH_ARM # include "arm/pixel.h" +# include "arm/predict.h" #endif #if ARCH_UltraSPARC # include "sparc/pixel.h" @@ -532,6 +533,10 @@ INTRA_MBCMP_8x8( sad, _mmx2, _c ) INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 ) #endif +#if !HIGH_BIT_DEPTH && HAVE_ARMV6 +INTRA_MBCMP_8x8( sad, _neon, _neon ) +INTRA_MBCMP_8x8(sa8d, _neon, _neon ) +#endif #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\ void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\ @@ -555,16 +560,26 @@ #if HAVE_MMX #if HIGH_BIT_DEPTH +#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx +#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c #define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse #define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse #define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse INTRA_MBCMP( sad, 4x4, v, h, dc, , _mmx2, _c ) -INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _c ) +INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _mmx2 ) +INTRA_MBCMP( sad, 8x16, dc, h, v, c, _mmx2, _mmx2 ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _mmx2, _mmx2 ) INTRA_MBCMP( sad, 16x16, v, h, dc, , _mmx2, _mmx2 ) INTRA_MBCMP( sad, 8x8, dc, h, v, c, _sse2, _sse2 ) +INTRA_MBCMP( sad, 8x16, dc, h, v, c, _sse2, _sse2 ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse2, _sse2 ) INTRA_MBCMP( sad, 16x16, v, h, dc, , _sse2, _sse2 ) INTRA_MBCMP( sad, 8x8, dc, h, v, c, _ssse3, _sse2 ) +INTRA_MBCMP( sad, 8x16, dc, h, v, c, _ssse3, _sse2 ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _ssse3, _sse2 ) INTRA_MBCMP( sad, 16x16, v, h, dc, , _ssse3, _sse2 ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse4, _sse2 ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _avx, _sse2 ) #else #define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx INTRA_MBCMP( sad, 8x16, dc, h, v, c, _mmx2, _mmx2 ) @@ -577,6 +592,16 @@ INTRA_MBCMP(satd, 8x16, dc, h, v, c, _xop, _mmx2 ) #endif #endif +#if !HIGH_BIT_DEPTH && HAVE_ARMV6 +INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _c ) +INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _c ) +INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon ) +INTRA_MBCMP(satd, 8x8, dc, h, v, c, _neon, _neon ) +INTRA_MBCMP( sad, 8x16, dc, h, v, c, _neon, _c ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c ) +INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon ) +INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon ) +#endif // No C implementation of intra_satd_x9. See checkasm for its behavior, // or see x264_mb_analyse_intra for the entirely different algorithm we @@ -868,6 +893,8 @@ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_mmx2; pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_mmx2; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmx2; + pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_mmx2; + pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2; pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_mmx2; pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2; } @@ -909,6 +936,8 @@ pixf->asd8 = x264_pixel_asd8_sse2; pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2; pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_sse2; + pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_sse2; + pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2; pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2; } if( cpu&X264_CPU_SSE2_IS_FAST ) @@ -948,6 +977,8 @@ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3; pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3; pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_ssse3; + pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_ssse3; + pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3; pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_ssse3; } if( cpu&X264_CPU_SSE4 ) @@ -963,6 +994,7 @@ #if ARCH_X86_64 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4; #endif + pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4; } if( cpu&X264_CPU_AVX ) { @@ -985,6 +1017,7 @@ #if ARCH_X86_64 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx; #endif + pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx; } if( cpu&X264_CPU_XOP ) { @@ -1119,12 +1152,6 @@ pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2; } } - - if( cpu&X264_CPU_SSE_MISALIGN ) - { - INIT2( sad_x3, _sse2_misalign ); - INIT2( sad_x4, _sse2_misalign ); - } } if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) ) @@ -1201,9 +1228,8 @@ } else { - pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3; - pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3; - pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3; + INIT2( sad_x3, _ssse3 ); + INIT5( sad_x4, _ssse3 ); } if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) ) { @@ -1237,6 +1263,8 @@ if( cpu&X264_CPU_AVX ) { INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */ + INIT2( sad_x3, _avx ); + INIT2( sad_x4, _avx ); INIT8( satd, _avx ); INIT7( satd_x3, _avx ); INIT7( satd_x4, _avx ); @@ -1334,8 +1362,21 @@ pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_neon; pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_neon; + pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_neon; pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_neon; pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_neon; + pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_neon; + + pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_neon; + pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_neon; + pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_neon; + pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_neon; + pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_neon; + pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_neon; + pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_neon; + pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon; + pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_neon; + pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; pixf->ssim_end4 = x264_pixel_ssim_end4_neon;
View file
x264-snapshot-20130723-2245.tar.bz2/common/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.h
Changed
@@ -1,11 +1,11 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2004-2013 x264 project + * Copyright (C) 2004-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com> - Henrik Gramner <hengar-6@student.ltu.se> + Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> * Eric Petit <eric.petit@lapsus.org>
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * Guillaume Poirier <gpoirier@mplayerhq.hu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/deblock.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: ppc deblocking ***************************************************************************** - * Copyright (C) 2007-2013 x264 project + * Copyright (C) 2007-2014 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * Guillaume Poirier <gpoirier@mplayerhq.hu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * Guillaume Poirier <gpoirier@mplayerhq.hu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/ppccommon.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/ppccommon.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ppccommon.h: ppc utility macros ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2013 x264 project + * Copyright (C) 2007-2014 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2013 x264 project + * Copyright (C) 2007-2014 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2013 x264 project + * Copyright (C) 2007-2014 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2013 x264 project + * Copyright (C) 2007-2014 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/predict.c
Changed
@@ -1,12 +1,12 @@ /***************************************************************************** * predict.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20130723-2245.tar.bz2/common/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: intra prediction ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/common/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/quant.c
Changed
@@ -1,12 +1,12 @@ /***************************************************************************** * quant.c: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com> * Christian Heine <sennindemokrit@gmx.net> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20130723-2245.tar.bz2/common/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/rectangle.c -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.c: rectangle filling ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Jason Garrett-Glaser <darkshikari@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/rectangle.h -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.h: rectangle filling ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Jason Garrett-Glaser <darkshikari@gmail.com> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/set.c -> x264-snapshot-20140321-2245.tar.bz2/common/set.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set.c: quantization init ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * @@ -105,9 +105,9 @@ }\ else\ {\ - CHECKED_MALLOC( h-> quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\ + CHECKED_MALLOC( h-> quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\ CHECKED_MALLOC( h->dequant##w##_mf[i], 6*size*sizeof(int) );\ - CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\ + CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\ }\ for( j = 0; j < i; j++ )\ if( deadzone[j] == deadzone[i] &&\ @@ -120,8 +120,8 @@ }\ else\ {\ - CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\ - CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\ + CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\ + CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\ }\ } @@ -159,7 +159,7 @@ quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]); } } - for( int q = 0; q < QP_MAX+1; q++ ) + for( int q = 0; q <= QP_MAX_SPEC; q++ ) { int j; for( int i_list = 0; i_list < 4; i_list++ )
View file
x264-snapshot-20130723-2245.tar.bz2/common/set.h -> x264-snapshot-20140321-2245.tar.bz2/common/set.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set.h: quantization init ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -248,6 +248,98 @@ x264_cqm_jvt8i, x264_cqm_jvt8p }; +// 1080i25_avci50, 1080p25_avci50 +static const uint8_t x264_cqm_avci50_4ic[16] = +{ + 16,22,28,40, + 22,28,40,44, + 28,40,44,48, + 40,44,48,60 +}; + +// 1080i25_avci50, +static const uint8_t x264_cqm_avci50_1080i_8iy[64] = +{ + 16,18,19,21,27,33,81,87, + 18,19,21,24,30,33,81,87, + 19,21,24,27,30,78,84,90, + 21,24,27,30,33,78,84,90, + 24,27,30,33,78,81,84,90, + 24,27,30,33,78,81,84,93, + 27,30,33,78,78,81,87,93, + 30,33,33,78,81,84,87,96 +}; + +// 1080p25_avci50, 720p25_avci50, 720p50_avci50 +static const uint8_t x264_cqm_avci50_p_8iy[64] = +{ + 16,18,19,21,24,27,30,33, + 18,19,21,24,27,30,33,78, + 19,21,24,27,30,33,78,81, + 21,24,27,30,33,78,81,84, + 24,27,30,33,78,81,84,87, + 27,30,33,78,81,84,87,90, + 30,33,78,81,84,87,90,93, + 33,78,81,84,87,90,93,96 +}; + +// 1080i25_avci100, 1080p25_avci100 +static const uint8_t x264_cqm_avci100_1080_4ic[16] = +{ + 16,20,26,32, + 20,26,32,38, + 26,32,38,44, + 32,38,44,50 +}; + +// 720p25_avci100, 720p50_avci100 +static const uint8_t x264_cqm_avci100_720p_4ic[16] = +{ + 16,21,27,34, + 21,27,34,41, + 27,34,41,46, + 34,41,46,54 +}; + +// 1080i25_avci100, +static const uint8_t x264_cqm_avci100_1080i_8iy[64] = +{ + 16,19,20,23,24,26,32,42, + 18,19,22,24,26,32,36,42, + 18,20,23,24,26,32,36,63, + 19,20,23,26,32,36,42,63, + 20,22,24,26,32,36,59,63, + 22,23,24,26,32,36,59,68, + 22,23,24,26,32,42,59,68, + 22,23,24,26,36,42,59,72 +}; + +// 1080p25_avci100, +static const uint8_t x264_cqm_avci100_1080p_8iy[64] = +{ + 16,18,19,20,22,23,24,26, + 18,19,20,22,23,24,26,32, + 19,20,22,23,24,26,32,36, + 20,22,23,24,26,32,36,42, + 22,23,24,26,32,36,42,59, + 23,24,26,32,36,42,59,63, + 24,26,32,36,42,59,63,68, + 26,32,36,42,59,63,68,72 +}; + +// 720p25_avci100, 720p50_avci100 +static const uint8_t x264_cqm_avci100_720p_8iy[64] = +{ + 16,18,19,21,22,24,26,32, + 18,19,19,21,22,24,26,32, + 19,19,21,22,22,24,26,32, + 21,21,22,22,23,24,26,34, + 22,22,22,23,24,25,26,34, + 24,24,24,24,25,26,34,36, + 26,26,26,26,26,34,36,38, + 32,32,32,34,34,36,38,42 +}; + int x264_cqm_init( x264_t *h ); void x264_cqm_delete( x264_t *h ); int x264_cqm_parse_file( x264_t *h, const char *filename );
View file
x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.asm -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.asm
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.asm: sparc pixel metrics ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Phil Jensen <philj@csufresno.edu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: sparc pixel metrics ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Phil Jensen <philj@csufresno.edu> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/threadpool.c -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.c: thread pooling ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/threadpool.h -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.h: thread pooling ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/common/vlc.c -> x264-snapshot-20140321-2245.tar.bz2/common/vlc.c
Changed
@@ -1,11 +1,11 @@ /***************************************************************************** * vlc.c : vlc tables ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Jason Garrett-Glaser <darkshikari@gmail.com> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20130723-2245.tar.bz2/common/win32thread.c -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.c: windows threading ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * Pegasys Inc. <http://www.pegasys-inc.com> @@ -261,7 +261,7 @@ int x264_win32_threading_init( void ) { /* find function pointers to API functions, if they exist */ - HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) ); + HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" ); thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" ); if( thread_control.cond_init ) { @@ -288,7 +288,7 @@ * On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */ #if ARCH_X86_64 /* find function pointers to API functions specific to x86_64 platforms, if they exist */ - HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) ); + HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" ); BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" ); if( get_thread_affinity ) {
View file
x264-snapshot-20130723-2245.tar.bz2/common/win32thread.h -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.h: windows threading ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -26,7 +26,6 @@ #ifndef X264_WIN32THREAD_H #define X264_WIN32THREAD_H -#define WIN32_LEAN_AND_MEAN #include <windows.h> /* the following macro is used within x264 */ #undef ERROR
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/bitstream-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/bitstream-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* bitstream-a.asm: x86 bitstream functions ;***************************************************************************** -;* Copyright (C) 2010-2013 x264 project +;* Copyright (C) 2010-2014 x264 project ;* ;* Authors: Jason Garrett-Glaser <darkshikari@gmail.com> ;* Henrik Gramner <henrik@gramner.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/cabac-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cabac-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* cabac-a.asm: x86 cabac ;***************************************************************************** -;* Copyright (C) 2008-2013 x264 project +;* Copyright (C) 2008-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/const-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/const-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* const-a.asm: x86 global constants ;***************************************************************************** -;* Copyright (C) 2010-2013 x264 project +;* Copyright (C) 2010-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com> @@ -36,6 +36,7 @@ const pw_512, times 16 dw 512 const pw_00ff, times 16 dw 0x00ff const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1) +const pw_0to15, dw 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 const pd_1, times 8 dd 1 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/cpu-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cpu-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* cpu-a.asm: x86 cpu utilities ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Laurent Aimar <fenrir@via.ecp.fr> ;* Loren Merritt <lorenm@u.washington.edu> @@ -146,17 +146,6 @@ sfence ret -;----------------------------------------------------------------------------- -; void cpu_mask_misalign_sse( void ) -;----------------------------------------------------------------------------- -cglobal cpu_mask_misalign_sse - sub rsp, 4 - stmxcsr [rsp] - or dword [rsp], 1<<17 - ldmxcsr [rsp] - add rsp, 4 - ret - cextern intel_cpu_indicator_init ;-----------------------------------------------------------------------------
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-32.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-32.asm: x86_32 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-64.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-64.asm: x86_64 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-a.asm: x86 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Holger Lubitz <holger@lubitz.org> ;* Loren Merritt <lorenm@u.washington.edu> @@ -675,7 +675,7 @@ mova m6, [pw_pixel_max] mova m7, [pd_32] pxor m5, m5 -.loop +.loop: mova m3, [r1] paddd m3, m7 psrad m3, 6 ; dc0 0 dc1 0 dc2 0 dc3 0
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: x86 transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/deblock-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/deblock-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* deblock-a.asm: x86 deblocking ;***************************************************************************** -;* Copyright (C) 2005-2013 x264 project +;* Copyright (C) 2005-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com> @@ -621,7 +621,7 @@ mov r6, 2 mova m0, [pw_2] LOAD_AB aa, bb, r2d, r3d -.loop +.loop: mova p2, [r4+r1] mova p1, [r4+2*r1] mova p0, [r4+r5] @@ -671,7 +671,7 @@ add r4, r0 ; pix+4*stride mov r6, 2 mova m0, [pw_2] -.loop +.loop: movu q3, [r0-8] movu q2, [r0+r1-8] movu q1, [r0+r1*2-8] @@ -804,35 +804,6 @@ %define PASS8ROWS(base, base3, stride, stride3, offset) \ PASS8ROWS(base+offset, base3+offset, stride, stride3) -; in: 8 rows of 4 bytes in %4..%11 -; out: 4 rows of 8 bytes in m0..m3 -%macro TRANSPOSE4x8_LOAD 11 - movh m0, %4 - movh m2, %5 - movh m1, %6 - movh m3, %7 - punpckl%1 m0, m2 - punpckl%1 m1, m3 - mova m2, m0 - punpckl%2 m0, m1 - punpckh%2 m2, m1 - - movh m4, %8 - movh m6, %9 - movh m5, %10 - movh m7, %11 - punpckl%1 m4, m6 - punpckl%1 m5, m7 - mova m6, m4 - punpckl%2 m4, m5 - punpckh%2 m6, m5 - - punpckh%3 m1, m0, m4 - punpckh%3 m3, m2, m6 - punpckl%3 m0, m4 - punpckl%3 m2, m6 -%endmacro - ; in: 4 rows of 8 bytes in m0..m3 ; out: 8 rows of 4 bytes in %1..%8 %macro TRANSPOSE8x4B_STORE 8 @@ -844,24 +815,24 @@ punpcklbw m2, m3 punpcklwd m1, m0, m2 punpckhwd m0, m2 - movh %1, m1 + movd %1, m1 punpckhdq m1, m1 - movh %2, m1 - movh %3, m0 + movd %2, m1 + movd %3, m0 punpckhdq m0, m0 - movh %4, m0 + movd %4, m0 punpckhdq m3, m3 punpcklbw m4, m5 punpcklbw m6, m3 punpcklwd m5, m4, m6 punpckhwd m4, m6 - movh %5, m5 + movd %5, m5 punpckhdq m5, m5 - movh %6, m5 - movh %7, m4 + movd %6, m5 + movd %7, m4 punpckhdq m4, m4 - movh %8, m4 + movd %8, m4 %endmacro ; in: 8 rows of 4 bytes in %9..%10 @@ -877,34 +848,94 @@ pextrd %8, %10, 3 %endmacro -%macro TRANSPOSE4x8B_LOAD 8 - TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8 -%endmacro - -%macro TRANSPOSE4x8W_LOAD 8 -%if mmsize==16 - TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8 -%else +; in: 4 rows of 4 words in %1..%4 +; out: 4 rows of 4 word in m0..m3 +; clobbers: m4 +%macro TRANSPOSE4x4W_LOAD 4-8 +%if mmsize==8 SWAP 1, 4, 2, 3 - mova m0, [t5] - mova m1, [t5+r1] - mova m2, [t5+r1*2] - mova m3, [t5+t6] + movq m0, %1 + movq m1, %2 + movq m2, %3 + movq m3, %4 TRANSPOSE4x4W 0, 1, 2, 3, 4 +%else + movq m0, %1 + movq m2, %2 + movq m1, %3 + movq m3, %4 + punpcklwd m0, m2 + punpcklwd m1, m3 + mova m2, m0 + punpckldq m0, m1 + punpckhdq m2, m1 + movhlps m1, m0 + movhlps m3, m2 %endif %endmacro -%macro TRANSPOSE8x2W_STORE 8 +; in: 2 rows of 4 words in m1..m2 +; out: 4 rows of 2 words in %1..%4 +; clobbers: m0, m1 +%macro TRANSPOSE4x2W_STORE 4-8 +%if mmsize==8 punpckhwd m0, m1, m2 punpcklwd m1, m2 -%if mmsize==8 +%else + punpcklwd m1, m2 + movhlps m0, m1 +%endif movd %3, m0 movd %1, m1 psrlq m1, 32 psrlq m0, 32 movd %2, m1 movd %4, m0 +%endmacro + +; in: 4/8 rows of 4 words in %1..%8 +; out: 4 rows of 4/8 word in m0..m3 +; clobbers: m4, m5, m6, m7 +%macro TRANSPOSE4x8W_LOAD 8 +%if mmsize==8 + TRANSPOSE4x4W_LOAD %1, %2, %3, %4 +%else + movq m0, %1 + movq m2, %2 + movq m1, %3 + movq m3, %4 + punpcklwd m0, m2 + punpcklwd m1, m3 + mova m2, m0 + punpckldq m0, m1 + punpckhdq m2, m1 + + movq m4, %5 + movq m6, %6 + movq m5, %7 + movq m7, %8 + punpcklwd m4, m6 + punpcklwd m5, m7 + mova m6, m4 + punpckldq m4, m5 + punpckhdq m6, m5 + + punpckhqdq m1, m0, m4 + punpckhqdq m3, m2, m6 + punpcklqdq m0, m4 + punpcklqdq m2, m6 +%endif +%endmacro + +; in: 2 rows of 4/8 words in m1..m2 +; out: 4/8 rows of 2 words in %1..%8 +; clobbers: m0, m1 +%macro TRANSPOSE8x2W_STORE 8 +%if mmsize==8 + TRANSPOSE4x2W_STORE %1, %2, %3, %4 %else + punpckhwd m0, m1, m2 + punpcklwd m1, m2 movd %5, m0 movd %1, m1 psrldq m1, 4 @@ -1118,7 +1149,7 @@ %endif mova m6, [pb_1] psubusb m4, m6 ; alpha - 1 - psubusb m5, m6 ; alpha - 2 + psubusb m5, m6 ; beta - 1 %if %0>2 mova %3, m4 %endif @@ -1361,19 +1392,18 @@ ;----------------------------------------------------------------------------- ; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- - %if cpuflag(avx) INIT_XMM cpuname %else INIT_MMX cpuname %endif -cglobal deblock_h_luma, 0,5,8,0x60+HAVE_ALIGNED_STACK*12 - mov r0, r0mp +cglobal deblock_h_luma, 1,5,8,0x60+12 mov r3, r1m lea r4, [r3*3] sub r0, 4 lea r1, [r0+r4] - %define pix_tmp esp+12*HAVE_ALIGNED_STACK + %define pix_tmp esp+12 + ; esp is intentionally misaligned to make it aligned after pushing the arguments for deblock_%1_luma. ; transpose 6x16 -> tmp space TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp @@ -2098,17 +2128,14 @@ ;----------------------------------------------------------------------------- %macro DEBLOCK_H_CHROMA_420_MBAFF 0 cglobal deblock_h_chroma_mbaff, 5,7,8 - sub r0, 4 - lea t6, [r1*3] - mov t5, r0 - add r0, t6 - TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6) + CHROMA_H_START + TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6) LOAD_MASK r2d, r3d movd m6, [r4] ; tc0 punpcklbw m6, m6 pand m7, m6 DEBLOCK_P0_Q0 - TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2) + TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2) RET %endmacro @@ -2249,9 +2276,9 @@ INIT_MMX mmx2 cglobal deblock_h_chroma_intra_mbaff, 4,6,8 CHROMA_H_START - TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6) + TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6) call chroma_intra_body - TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2) + TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2) RET %endif ; !HIGH_BIT_DEPTH
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com> @@ -1029,59 +1029,48 @@ jg .height_loop RET +INIT_XMM cglobal pixel_avg2_w16_sse2, 6,7 sub r4, r2 lea r6, [r4+r3] .height_loop: - movdqu xmm0, [r2] - movdqu xmm2, [r2+r3] - movdqu xmm1, [r2+r4] - movdqu xmm3, [r2+r6] + movu m0, [r2] + movu m2, [r2+r3] + movu m1, [r2+r4] + movu m3, [r2+r6] lea r2, [r2+r3*2] - pavgb xmm0, xmm1 - pavgb xmm2, xmm3 - movdqa [r0], xmm0 - movdqa [r0+r1], xmm2 + pavgb m0, m1 + pavgb m2, m3 + mova [r0], m0 + mova [r0+r1], m2 lea r0, [r0+r1*2] - sub r5d, 2 - jg .height_loop + sub r5d, 2 + jg .height_loop RET -%macro AVG2_W20 1 -cglobal pixel_avg2_w20_%1, 6,7 +cglobal pixel_avg2_w20_sse2, 6,7 sub r2, r4 lea r6, [r2+r3] .height_loop: - movdqu xmm0, [r4] - movdqu xmm2, [r4+r3] -%ifidn %1, sse2_misalign - movd mm4, [r4+16] - movd mm5, [r4+r3+16] - pavgb xmm0, [r4+r2] - pavgb xmm2, [r4+r6] -%else - movdqu xmm1, [r4+r2] - movdqu xmm3, [r4+r6] - movd mm4, [r4+16] - movd mm5, [r4+r3+16] - pavgb xmm0, xmm1 - pavgb xmm2, xmm3 -%endif - pavgb mm4, [r4+r2+16] - pavgb mm5, [r4+r6+16] + movu m0, [r4] + movu m2, [r4+r3] + movu m1, [r4+r2] + movu m3, [r4+r6] + movd mm4, [r4+16] + movd mm5, [r4+r3+16] + pavgb m0, m1 + pavgb m2, m3 + pavgb mm4, [r4+r2+16] + pavgb mm5, [r4+r6+16] lea r4, [r4+r3*2] - movdqa [r0], xmm0 - movd [r0+16], mm4 - movdqa [r0+r1], xmm2 - movd [r0+r1+16], mm5 + mova [r0], m0 + mova [r0+r1], m2 + movd [r0+16], mm4 + movd [r0+r1+16], mm5 lea r0, [r0+r1*2] - sub r5d, 2 - jg .height_loop + sub r5d, 2 + jg .height_loop RET -%endmacro - -AVG2_W20 sse2 -AVG2_W20 sse2_misalign INIT_YMM avx2 cglobal pixel_avg2_w20, 6,7 @@ -1524,7 +1513,7 @@ %endmacro %else ; !HIGH_BIT_DEPTH %macro UNPACK_UNALIGNED 3 -%if mmsize == 8 || cpuflag(misalign) +%if mmsize == 8 punpcklwd %1, %3 %else movh %2, %3 @@ -2130,8 +2119,6 @@ %else ; !HIGH_BIT_DEPTH INIT_MMX mmx2 MC_CHROMA -INIT_XMM sse2, misalign -MC_CHROMA INIT_XMM sse2 MC_CHROMA INIT_XMM ssse3
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a2.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a2.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a2.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2005-2013 x264 project +;* Copyright (C) 2005-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com> @@ -32,6 +32,7 @@ SECTION_RODATA 32 +pw_1024: times 16 dw 1024 filt_mul20: times 32 db 20 filt_mul15: times 16 db 1, -5 filt_mul51: times 16 db -5, 1 @@ -39,17 +40,25 @@ deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15 %if HIGH_BIT_DEPTH +v210_mask: times 4 dq 0xc00ffc003ff003ff +v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15 +v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14 +; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register +v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800 + dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800 + deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15 %else +deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1 + db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1 + deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30 deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31 -%endif -pw_1024: times 16 dw 1024 +%endif ; !HIGH_BIT_DEPTH pd_16: times 4 dd 16 pd_0f: times 4 dd 0xffff -pf_inv256: times 8 dd 0.00390625 pad10: times 8 dw 10*PIXEL_MAX pad20: times 8 dw 20*PIXEL_MAX @@ -60,16 +69,22 @@ tap2: times 4 dw 20, 20 tap3: times 4 dw -5, 1 +pw_0xc000: times 8 dw 0xc000 +pw_31: times 8 dw 31 +pd_4: times 4 dd 4 + SECTION .text cextern pb_0 cextern pw_1 +cextern pw_8 cextern pw_16 cextern pw_32 cextern pw_512 cextern pw_00ff cextern pw_3fff cextern pw_pixel_max +cextern pw_0to15 cextern pd_ffff %macro LOAD_ADD 4 @@ -482,7 +497,7 @@ %define pw_rnd [pw_32] %endif ; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer... -%if cpuflag(misalign) || mmsize==32 +%if mmsize==32 .loop: movu m4, [src-4] movu m5, [src-2] @@ -630,8 +645,6 @@ HPEL_V 0 INIT_XMM sse2 HPEL_V 8 -INIT_XMM sse2, misalign -HPEL_C %if ARCH_X86_64 == 0 INIT_XMM sse2 HPEL_C @@ -1197,6 +1210,163 @@ RET %endmacro ; PLANE_DEINTERLEAVE +%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2 +%if cpuflag(ssse3) + mova m3, [deinterleave_rgb_shuf+(%1-3)*16] +%endif +%%loopy: + mov %8, r6 + mov %9, %6 +%%loopx: + movu m0, [%8] + movu m1, [%8+%1*mmsize/4] +%if cpuflag(ssse3) + pshufb m0, m3 ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3 + pshufb m1, m3 ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7 +%elif %1 == 3 + psrldq m2, m0, 6 + punpcklqdq m0, m1 ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5 + psrldq m1, 6 + punpcklqdq m2, m1 ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7 + psrlq m3, m0, 24 + psrlq m4, m2, 24 + punpckhbw m1, m0, m3 ; b4 b5 g4 g5 r4 r5 + punpcklbw m0, m3 ; b0 b1 g0 g1 r0 r1 + punpckhbw m3, m2, m4 ; b6 b7 g6 g7 r6 r7 + punpcklbw m2, m4 ; b2 b3 g2 g3 r2 r3 + punpcklwd m0, m2 ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3 + punpcklwd m1, m3 ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7 +%else + pshufd m3, m0, q2301 + pshufd m4, m1, q2301 + punpckhbw m2, m0, m3 ; b2 b3 g2 g3 r2 r3 + punpcklbw m0, m3 ; b0 b1 g0 g1 r0 r1 + punpckhbw m3, m1, m4 ; b6 b7 g6 g7 r6 r7 + punpcklbw m1, m4 ; b4 b5 g4 g5 r4 r5 + punpcklwd m0, m2 ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3 + punpcklwd m1, m3 ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7 +%endif + punpckldq m2, m0, m1 ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7 + punpckhdq m0, m1 ; r0 r1 r2 r3 r4 r5 r6 r7 + movh [r0+%9], m2 + movhps [r2+%9], m2 + movh [r4+%9], m0 + add %8, %1*mmsize/2 + add %9, mmsize/2 + jl %%loopx + add r0, %2 + add r2, %3 + add r4, %4 + add r6, %5 + dec %7d + jg %%loopy +%endmacro + +%macro PLANE_DEINTERLEAVE_RGB 0 +;----------------------------------------------------------------------------- +; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta, +; pixel *dstb, intptr_t i_dstb, +; pixel *dstc, intptr_t i_dstc, +; pixel *src, intptr_t i_src, int pw, int w, int h ) +;----------------------------------------------------------------------------- +%if ARCH_X86_64 +cglobal plane_copy_deinterleave_rgb, 8,12 + %define %%args r1, r3, r5, r7, r8, r9, r10, r11 + mov r8d, r9m + mov r9d, r10m + add r0, r8 + add r2, r8 + add r4, r8 + neg r8 +%else +cglobal plane_copy_deinterleave_rgb, 1,7 + %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5 + mov r1, r9m + mov r2, r2m + mov r4, r4m + mov r6, r6m + add r0, r1 + add r2, r1 + add r4, r1 + neg r1 + mov r9m, r1 + mov r1, r10m +%endif + cmp dword r8m, 4 + je .pw4 + PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR + jmp .ret +.pw4: + PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA +.ret: + REP_RET +%endmacro + +%if HIGH_BIT_DEPTH == 0 +INIT_XMM sse2 +PLANE_DEINTERLEAVE_RGB +INIT_XMM ssse3 +PLANE_DEINTERLEAVE_RGB +%endif ; !HIGH_BIT_DEPTH + +%macro PLANE_DEINTERLEAVE_V210 0 +;----------------------------------------------------------------------------- +; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty, +; uint16_t *dstc, intptr_t i_dstc, +; uint32_t *src, intptr_t i_src, int w, int h ) +;----------------------------------------------------------------------------- +%if ARCH_X86_64 +cglobal plane_copy_deinterleave_v210, 8,10,7 +%define src r8 +%define org_w r9 +%define h r7d +%else +cglobal plane_copy_deinterleave_v210, 7,7,7 +%define src r4m +%define org_w r6m +%define h dword r7m +%endif + FIX_STRIDES r1, r3, r6d + shl r5, 2 + add r0, r6 + add r2, r6 + neg r6 + mov src, r4 + mov org_w, r6 + mova m2, [v210_mask] + mova m3, [v210_luma_shuf] + mova m4, [v210_chroma_shuf] + mova m5, [v210_mult] ; also functions as vpermd index for avx2 + pshufd m6, m5, q1102 + +ALIGN 16 +.loop: + movu m1, [r4] + pandn m0, m2, m1 + pand m1, m2 + pshufb m0, m3 + pshufb m1, m4 + pmulhrsw m0, m5 ; y0 y1 y2 y3 y4 y5 __ __ + pmulhrsw m1, m6 ; u0 v0 u1 v1 u2 v2 __ __ +%if mmsize == 32 + vpermd m0, m5, m0 + vpermd m1, m5, m1 +%endif + movu [r0+r6], m0 + movu [r2+r6], m1 + add r4, mmsize + add r6, 3*mmsize/4 + jl .loop + add r0, r1 + add r2, r3 + add src, r5 + mov r4, src + mov r6, org_w + dec h + jg .loop + RET +%endmacro ; PLANE_DEINTERLEAVE_V210 + %if HIGH_BIT_DEPTH INIT_MMX mmx2 PLANE_INTERLEAVE @@ -1205,9 +1375,14 @@ INIT_XMM sse2 PLANE_INTERLEAVE PLANE_DEINTERLEAVE +INIT_XMM ssse3 +PLANE_DEINTERLEAVE_V210 INIT_XMM avx PLANE_INTERLEAVE PLANE_DEINTERLEAVE +PLANE_DEINTERLEAVE_V210 +INIT_YMM avx2 +PLANE_DEINTERLEAVE_V210 %else INIT_MMX mmx2 PLANE_INTERLEAVE @@ -1813,62 +1988,64 @@ ; uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ) ;----------------------------------------------------------------------------- %macro MBTREE 0 -cglobal mbtree_propagate_cost, 7,7,7 - add r6d, r6d - lea r0, [r0+r6*2] - add r1, r6 - add r2, r6 - add r3, r6 - add r4, r6 - neg r6 - pxor xmm4, xmm4 - movss xmm6, [r5] - shufps xmm6, xmm6, 0 - mulps xmm6, [pf_inv256] - movdqa xmm5, [pw_3fff] +cglobal mbtree_propagate_cost, 6,6,7 + movss m6, [r5] + mov r5d, r6m + lea r0, [r0+r5*2] + add r5d, r5d + add r1, r5 + add r2, r5 + add r3, r5 + add r4, r5 + neg r5 + pxor m4, m4 + shufps m6, m6, 0 + mova m5, [pw_3fff] .loop: - movq xmm2, [r2+r6] ; intra - movq xmm0, [r4+r6] ; invq - movq xmm3, [r3+r6] ; inter - movq xmm1, [r1+r6] ; prop - punpcklwd xmm2, xmm4 - punpcklwd xmm0, xmm4 - pmaddwd xmm0, xmm2 - pand xmm3, xmm5 - punpcklwd xmm1, xmm4 - punpcklwd xmm3, xmm4 + movq m2, [r2+r5] ; intra + movq m0, [r4+r5] ; invq + movq m3, [r3+r5] ; inter + movq m1, [r1+r5] ; prop + pand m3, m5 + pminsw m3, m2 + punpcklwd m2, m4 + punpcklwd m0, m4 + pmaddwd m0, m2 + punpcklwd m1, m4 + punpcklwd m3, m4 %if cpuflag(fma4) - cvtdq2ps xmm0, xmm0 - cvtdq2ps xmm1, xmm1 - fmaddps xmm0, xmm0, xmm6, xmm1 - cvtdq2ps xmm1, xmm2 - psubd xmm2, xmm3 - cvtdq2ps xmm2, xmm2 - rcpps xmm3, xmm1 - mulps xmm1, xmm3 - mulps xmm0, xmm2 - addps xmm2, xmm3, xmm3 - fnmaddps xmm3, xmm1, xmm3, xmm2 - mulps xmm0, xmm3 + cvtdq2ps m0, m0 + cvtdq2ps m1, m1 + fmaddps m0, m0, m6, m1 + cvtdq2ps m1, m2 + psubd m2, m3 + cvtdq2ps m2, m2 + rcpps m3, m1 + mulps m1, m3 + mulps m0, m2 + addps m2, m3, m3 + fnmaddps m3, m1, m3, m2 + mulps m0, m3 %else - cvtdq2ps xmm0, xmm0 - mulps xmm0, xmm6 ; intra*invq*fps_factor>>8 - cvtdq2ps xmm1, xmm1 ; prop - addps xmm0, xmm1 ; prop + (intra*invq*fps_factor>>8) - cvtdq2ps xmm1, xmm2 ; intra - psubd xmm2, xmm3 ; intra - inter - cvtdq2ps xmm2, xmm2 ; intra - inter - rcpps xmm3, xmm1 ; 1 / intra 1st approximation - mulps xmm1, xmm3 ; intra * (1/intra 1st approx) - mulps xmm1, xmm3 ; intra * (1/intra 1st approx)^2 - mulps xmm0, xmm2 ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter) - addps xmm3, xmm3 ; 2 * (1/intra 1st approx) - subps xmm3, xmm1 ; 2nd approximation for 1/intra - mulps xmm0, xmm3 ; / intra -%endif - cvtps2dq xmm0, xmm0 - movdqa [r0+r6*2], xmm0 - add r6, 8 + cvtdq2ps m0, m0 + mulps m0, m6 ; intra*invq*fps_factor>>8 + cvtdq2ps m1, m1 ; prop + addps m0, m1 ; prop + (intra*invq*fps_factor>>8) + cvtdq2ps m1, m2 ; intra + psubd m2, m3 ; intra - inter + cvtdq2ps m2, m2 ; intra - inter + rcpps m3, m1 ; 1 / intra 1st approximation + mulps m1, m3 ; intra * (1/intra 1st approx) + mulps m1, m3 ; intra * (1/intra 1st approx)^2 + mulps m0, m2 ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter) + addps m3, m3 ; 2 * (1/intra 1st approx) + subps m3, m1 ; 2nd approximation for 1/intra + mulps m0, m3 ; / intra +%endif + cvtps2dq m0, m0 + packssdw m0, m0 + movh [r0+r5], m0 + add r5, 8 jl .loop RET %endmacro @@ -1880,34 +2057,35 @@ MBTREE %macro INT16_UNPACK 1 - vpunpckhwd xm4, xm%1, xm7 - vpunpcklwd xm%1, xm7 - vinsertf128 m%1, m%1, xm4, 1 + punpckhwd xm4, xm%1, xm7 + punpcklwd xm%1, xm7 + vinsertf128 m%1, m%1, xm4, 1 %endmacro -; FIXME: align loads/stores to 16 bytes -%macro MBTREE_AVX 0 -cglobal mbtree_propagate_cost, 7,7,8 - add r6d, r6d - lea r0, [r0+r6*2] - add r1, r6 - add r2, r6 - add r3, r6 - add r4, r6 - neg r6 - mova xm5, [pw_3fff] - vbroadcastss m6, [r5] - mulps m6, [pf_inv256] +; FIXME: align loads to 16 bytes +%macro MBTREE_AVX 1 +cglobal mbtree_propagate_cost, 6,6,%1 + vbroadcastss m6, [r5] + mov r5d, r6m + lea r0, [r0+r5*2] + add r5d, r5d + add r1, r5 + add r2, r5 + add r3, r5 + add r4, r5 + neg r5 + mova xm5, [pw_3fff] %if notcpuflag(avx2) - pxor xm7, xm7 + pxor xm7, xm7 %endif .loop: %if cpuflag(avx2) - pmovzxwd m0, [r2+r6] ; intra - pmovzxwd m1, [r4+r6] ; invq - pmovzxwd m2, [r1+r6] ; prop - pand xm3, xm5, [r3+r6] ; inter + pmovzxwd m0, [r2+r5] ; intra + pmovzxwd m1, [r4+r5] ; invq + pmovzxwd m2, [r1+r5] ; prop + pand xm3, xm5, [r3+r5] ; inter pmovzxwd m3, xm3 + pminsd m3, m0 pmaddwd m1, m0 psubd m4, m0, m3 cvtdq2ps m0, m0 @@ -1922,10 +2100,11 @@ fnmaddps m4, m2, m3, m4 mulps m1, m4 %else - movu xm0, [r2+r6] - movu xm1, [r4+r6] - movu xm2, [r1+r6] - pand xm3, xm5, [r3+r6] + movu xm0, [r2+r5] + movu xm1, [r4+r5] + movu xm2, [r1+r5] + pand xm3, xm5, [r3+r5] + pminsw xm3, xm0 INT16_UNPACK 0 INT16_UNPACK 1 INT16_UNPACK 2 @@ -1947,13 +2126,107 @@ mulps m1, m3 ; / intra %endif vcvtps2dq m1, m1 - movu [r0+r6*2], m1 - add r6, 16 + vextractf128 xm2, m1, 1 + packssdw xm1, xm2 + mova [r0+r5], xm1 + add r5, 16 jl .loop RET %endmacro INIT_YMM avx -MBTREE_AVX +MBTREE_AVX 8 INIT_YMM avx2,fma3 -MBTREE_AVX +MBTREE_AVX 7 + +%macro MBTREE_PROPAGATE_LIST 0 +;----------------------------------------------------------------------------- +; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int *propagate_amount, uint16_t *lowres_costs, +; int16_t *output, int bipred_weight, int mb_y, int len ) +;----------------------------------------------------------------------------- +cglobal mbtree_propagate_list_internal, 4,6,8 + movh m6, [pw_0to15] ; mb_x + movd m7, r5m + pshuflw m7, m7, 0 + punpcklwd m6, m7 ; 0 y 1 y 2 y 3 y + movd m7, r4m + SPLATW m7, m7 ; bipred_weight + psllw m7, 9 ; bipred_weight << 9 + + mov r5d, r6m + xor r4d, r4d +.loop: + mova m3, [r1+r4*2] + movu m4, [r2+r4*2] + mova m5, [pw_0xc000] + pand m4, m5 + pcmpeqw m4, m5 + pmulhrsw m5, m3, m7 ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6 +%if cpuflag(avx) + pblendvb m5, m3, m5, m4 +%else + pand m5, m4 + pandn m4, m3 + por m5, m4 ; if( lists_used == 3 ) + ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6 +%endif + + movu m0, [r0+r4*4] ; x,y + movu m1, [r0+r4*4+mmsize] + + psraw m2, m0, 5 + psraw m3, m1, 5 + mova m4, [pd_4] + paddw m2, m6 ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y} + paddw m6, m4 ; {mbx, mby} += {4, 0} + paddw m3, m6 ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y} + paddw m6, m4 ; {mbx, mby} += {4, 0} + + mova [r3+mmsize*0], m2 + mova [r3+mmsize*1], m3 + + mova m3, [pw_31] + pand m0, m3 ; x &= 31 + pand m1, m3 ; y &= 31 + packuswb m0, m1 + psrlw m1, m0, 3 + pand m0, m3 ; x + SWAP 1, 3 + pandn m1, m3 ; y premultiplied by (1<<5) for later use of pmulhrsw + + mova m3, [pw_32] + psubw m3, m0 ; 32 - x + mova m4, [pw_1024] + psubw m4, m1 ; (32 - y) << 5 + + pmullw m2, m3, m4 ; idx0weight = (32-y)*(32-x) << 5 + pmullw m4, m0 ; idx1weight = (32-y)*x << 5 + pmullw m0, m1 ; idx3weight = y*x << 5 + pmullw m1, m3 ; idx2weight = y*(32-x) << 5 + + ; avoid overflow in the input to pmulhrsw + psrlw m3, m2, 15 + psubw m2, m3 ; idx0weight -= (idx0weight == 32768) + + pmulhrsw m2, m5 ; idx0weight * propagate_amount + 512 >> 10 + pmulhrsw m4, m5 ; idx1weight * propagate_amount + 512 >> 10 + pmulhrsw m1, m5 ; idx2weight * propagate_amount + 512 >> 10 + pmulhrsw m0, m5 ; idx3weight * propagate_amount + 512 >> 10 + + SBUTTERFLY wd, 2, 4, 3 + SBUTTERFLY wd, 1, 0, 3 + mova [r3+mmsize*2], m2 + mova [r3+mmsize*3], m4 + mova [r3+mmsize*4], m1 + mova [r3+mmsize*5], m0 + add r4d, mmsize/2 + add r3, mmsize*6 + cmp r4d, r5d + jl .loop + REP_RET +%endmacro + +INIT_XMM ssse3 +MBTREE_PROPAGATE_LIST +INIT_XMM avx +MBTREE_PROPAGATE_LIST
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -116,6 +116,23 @@ void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu, uint16_t *dstv, intptr_t i_dstv, uint16_t *src, intptr_t i_src, int w, int h ); +void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta, + pixel *dstb, intptr_t i_dstb, + pixel *dstc, intptr_t i_dstc, + pixel *src, intptr_t i_src, int pw, int w, int h ); +void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta, + pixel *dstb, intptr_t i_dstb, + pixel *dstc, intptr_t i_dstc, + pixel *src, intptr_t i_src, int pw, int w, int h ); +void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu, + uint16_t *dstv, intptr_t i_dstv, + uint32_t *src, intptr_t i_src, int w, int h ); +void x264_plane_copy_deinterleave_v210_avx ( uint16_t *dstu, intptr_t i_dstu, + uint16_t *dstv, intptr_t i_dstv, + uint32_t *src, intptr_t i_src, int w, int h ); +void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu, + uint16_t *dstv, intptr_t i_dstv, + uint32_t *src, intptr_t i_src, int w, int h ); void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); @@ -144,13 +161,13 @@ void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride ); void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride ); void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride ); -void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, +void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); -void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, +void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); -void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, +void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); -void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, +void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); #define MC_CHROMA(cpu)\ @@ -158,7 +175,6 @@ int dx, int dy, int i_width, int i_height ); MC_CHROMA(mmx2) MC_CHROMA(sse2) -MC_CHROMA(sse2_misalign) MC_CHROMA(ssse3) MC_CHROMA(ssse3_cache64) MC_CHROMA(avx) @@ -186,7 +202,6 @@ PIXEL_AVG_WALL(cache64_mmx2) PIXEL_AVG_WALL(cache64_sse2) PIXEL_AVG_WALL(sse2) -PIXEL_AVG_WALL(sse2_misalign) PIXEL_AVG_WALL(cache64_ssse3) PIXEL_AVG_WALL(avx2) @@ -227,7 +242,6 @@ PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2) #endif PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2) -PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign) PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2) PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2) PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2) @@ -429,7 +443,6 @@ GET_REF(cache32_mmx2) GET_REF(cache64_mmx2) #endif -GET_REF(sse2_misalign) GET_REF(cache64_sse2) GET_REF(cache64_ssse3) GET_REF(cache64_ssse3_atom) @@ -477,7 +490,6 @@ HPEL(16, avx, avx, avx, avx) HPEL(32, avx2, avx2, avx2, avx2) #endif -HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2) #endif // HIGH_BIT_DEPTH static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ) @@ -521,6 +533,113 @@ PLANE_INTERLEAVE(avx) #endif +#if HAVE_X86_INLINE_ASM +#define CLIP_ADD(s,x)\ +do\ +{\ + int temp;\ + asm("movd %0, %%xmm0 \n"\ + "movd %2, %%xmm1 \n"\ + "paddsw %%xmm1, %%xmm0 \n"\ + "movd %%xmm0, %1 \n"\ + :"+m"(s), "=&r"(temp)\ + :"m"(x)\ + );\ + s = temp;\ +} while(0) + +#define CLIP_ADD2(s,x)\ +do\ +{\ + asm("movd %0, %%xmm0 \n"\ + "movd %1, %%xmm1 \n"\ + "paddsw %%xmm1, %%xmm0 \n"\ + "movd %%xmm0, %0 \n"\ + :"+m"(M32(s))\ + :"m"(M32(x))\ + );\ +} while(0) +#else +#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1) +#define CLIP_ADD2(s,x)\ +do\ +{\ + CLIP_ADD((s)[0], (x)[0]);\ + CLIP_ADD((s)[1], (x)[1]);\ +} while(0) +#endif + +#define PROPAGATE_LIST(cpu)\ +void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\ + uint16_t *lowres_costs, int16_t *output,\ + int bipred_weight, int mb_y, int len );\ +\ +static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\ + int16_t *propagate_amount, uint16_t *lowres_costs,\ + int bipred_weight, int mb_y, int len, int list )\ +{\ + int16_t *current = h->scratch_buffer2;\ +\ + x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\ + current, bipred_weight, mb_y, len );\ +\ + unsigned stride = h->mb.i_mb_stride;\ + unsigned width = h->mb.i_mb_width;\ + unsigned height = h->mb.i_mb_height;\ +\ + for( unsigned i = 0; i < len; current += 32 )\ + {\ + int end = X264_MIN( i+8, len );\ + for( ; i < end; i++, current += 2 )\ + {\ + if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\ + continue;\ +\ + unsigned mbx = current[0];\ + unsigned mby = current[1];\ + unsigned idx0 = mbx + mby * stride;\ + unsigned idx2 = idx0 + stride;\ +\ + /* Shortcut for the simple/common case of zero MV */\ + if( !M32( mvs[i] ) )\ + {\ + CLIP_ADD( ref_costs[idx0], current[16] );\ + continue;\ + }\ +\ + if( mbx < width-1 && mby < height-1 )\ + {\ + CLIP_ADD2( ref_costs+idx0, current+16 );\ + CLIP_ADD2( ref_costs+idx2, current+32 );\ + }\ + else\ + {\ + /* Note: this takes advantage of unsigned representation to\ + * catch negative mbx/mby. */\ + if( mby < height )\ + {\ + if( mbx < width )\ + CLIP_ADD( ref_costs[idx0+0], current[16] );\ + if( mbx+1 < width )\ + CLIP_ADD( ref_costs[idx0+1], current[17] );\ + }\ + if( mby+1 < height )\ + {\ + if( mbx < width )\ + CLIP_ADD( ref_costs[idx2+0], current[32] );\ + if( mbx+1 < width )\ + CLIP_ADD( ref_costs[idx2+1], current[33] );\ + }\ + }\ + }\ + }\ +} + +PROPAGATE_LIST(ssse3) +PROPAGATE_LIST(avx) +#undef CLIP_ADD +#undef CLIP_ADD2 + void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) { if( !(cpu&X264_CPU_MMX) ) @@ -632,6 +751,8 @@ return; pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3; + pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3; + pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3; if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) ) pf->integral_init4v = x264_integral_init4v_ssse3; @@ -644,6 +765,7 @@ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx; pf->plane_copy_interleave = x264_plane_copy_interleave_avx; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx; + pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx; pf->store_interleave_chroma = x264_store_interleave_chroma_avx; pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_avx; @@ -654,7 +776,10 @@ pf->frame_init_lowres_core = x264_frame_init_lowres_core_xop; if( cpu&X264_CPU_AVX2 ) + { pf->mc_luma = mc_luma_avx2; + pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2; + } #else // !HIGH_BIT_DEPTH #if ARCH_X86 // all x86_64 cpus with cacheline split issues use sse2 instead @@ -679,6 +804,7 @@ pf->integral_init8v = x264_integral_init8v_sse2; pf->hpel_filter = x264_hpel_filter_sse2_amd; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2; + pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2; if( !(cpu&X264_CPU_SSE2_IS_SLOW) ) { @@ -696,8 +822,6 @@ pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_sse2; pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_sse2; pf->hpel_filter = x264_hpel_filter_sse2; - if( cpu&X264_CPU_SSE_MISALIGN ) - pf->hpel_filter = x264_hpel_filter_sse2_misalign; pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2; if( !(cpu&X264_CPU_STACK_MOD4) ) pf->mc_chroma = x264_mc_chroma_sse2; @@ -716,12 +840,6 @@ pf->mc_luma = mc_luma_cache64_sse2; pf->get_ref = get_ref_cache64_sse2; } - if( cpu&X264_CPU_SSE_MISALIGN ) - { - pf->get_ref = get_ref_sse2_misalign; - if( !(cpu&X264_CPU_STACK_MOD4) ) - pf->mc_chroma = x264_mc_chroma_sse2_misalign; - } } } @@ -737,6 +855,8 @@ pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_ssse3; pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_ssse3; pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_ssse3; + pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3; + pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3; if( !(cpu&X264_CPU_SLOW_PSHUFB) ) { @@ -813,6 +933,7 @@ return; pf->memzero_aligned = x264_memzero_aligned_avx; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx; + pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx; if( cpu&X264_CPU_FMA4 ) pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-32.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel-32.asm: x86_32 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel.asm: x86 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org> @@ -205,7 +205,7 @@ mov r4d, %%n %endif pxor m0, m0 -.loop +.loop: mova m1, [r0] mova m2, [r0+offset0_1] mova m3, [r0+offset0_2] @@ -1265,7 +1265,7 @@ ; clobber: m3..m7 ; out: %1 = satd %macro SATD_4x4_MMX 3 - %xdefine %%n n%1 + %xdefine %%n nn%1 %assign offset %2*SIZEOF_PIXEL LOAD_DIFF m4, m3, none, [r0+ offset], [r2+ offset] LOAD_DIFF m5, m3, none, [r0+ r1+offset], [r2+ r3+offset]
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: x86 pixel metrics ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -47,7 +47,6 @@ DECL_X1( sad, mmx2 ) DECL_X1( sad, sse2 ) -DECL_X4( sad, sse2_misalign ) DECL_X1( sad, sse3 ) DECL_X1( sad, sse2_aligned ) DECL_X1( sad, ssse3 ) @@ -57,6 +56,7 @@ DECL_X4( sad, sse2 ) DECL_X4( sad, sse3 ) DECL_X4( sad, ssse3 ) +DECL_X4( sad, avx ) DECL_X4( sad, avx2 ) DECL_X1( ssd, mmx ) DECL_X1( ssd, mmx2 )
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* predict-a.asm: x86 intra prediction ;***************************************************************************** -;* Copyright (C) 2005-2013 x264 project +;* Copyright (C) 2005-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org> @@ -31,7 +31,6 @@ SECTION_RODATA 32 -pw_0to15: dw 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 pw_43210123: times 2 dw -3, -2, -1, 0, 1, 2, 3, 4 pw_m3: times 16 dw -3 pw_m7: times 16 dw -7 @@ -56,6 +55,7 @@ cextern pw_16 cextern pw_00ff cextern pw_pixel_max +cextern pw_0to15 %macro STORE8 1 mova [r0+0*FDEC_STRIDEB], %1
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict-c.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: x86 intra prediction ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* quant-a.asm: x86 quantization and level-run ;***************************************************************************** -;* Copyright (C) 2005-2013 x264 project +;* Copyright (C) 2005-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: x86 quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/sad-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad-a.asm: x86 sad functions ;***************************************************************************** -;* Copyright (C) 2003-2013 x264 project +;* Copyright (C) 2003-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Jason Garrett-Glaser <darkshikari@gmail.com> @@ -32,7 +32,6 @@ SECTION_RODATA 32 pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1 -deinterleave_sadx4: dd 0,4,2,6 hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11 SECTION .text @@ -1009,62 +1008,56 @@ ;============================================================================= %macro SAD_X3_START_1x16P_SSE2 0 -%if cpuflag(misalign) - mova xmm2, [r0] - movu xmm0, [r1] - movu xmm1, [r2] - psadbw xmm0, xmm2 - psadbw xmm1, xmm2 - psadbw xmm2, [r3] + mova m2, [r0] +%if cpuflag(avx) + psadbw m0, m2, [r1] + psadbw m1, m2, [r2] + psadbw m2, [r3] %else - mova xmm3, [r0] - movu xmm0, [r1] - movu xmm1, [r2] - movu xmm2, [r3] - psadbw xmm0, xmm3 - psadbw xmm1, xmm3 - psadbw xmm2, xmm3 + movu m0, [r1] + movu m1, [r2] + movu m3, [r3] + psadbw m0, m2 + psadbw m1, m2 + psadbw m2, m3 %endif %endmacro %macro SAD_X3_1x16P_SSE2 2 -%if cpuflag(misalign) - mova xmm3, [r0+%1] - movu xmm4, [r1+%2] - movu xmm5, [r2+%2] - psadbw xmm4, xmm3 - psadbw xmm5, xmm3 - psadbw xmm3, [r3+%2] - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm3 + mova m3, [r0+%1] +%if cpuflag(avx) + psadbw m4, m3, [r1+%2] + psadbw m5, m3, [r2+%2] + psadbw m3, [r3+%2] %else - mova xmm3, [r0+%1] - movu xmm4, [r1+%2] - movu xmm5, [r2+%2] - movu xmm6, [r3+%2] - psadbw xmm4, xmm3 - psadbw xmm5, xmm3 - psadbw xmm6, xmm3 - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm6 + movu m4, [r1+%2] + movu m5, [r2+%2] + movu m6, [r3+%2] + psadbw m4, m3 + psadbw m5, m3 + psadbw m3, m6 %endif + paddw m0, m4 + paddw m1, m5 + paddw m2, m3 %endmacro +%if ARCH_X86_64 + DECLARE_REG_TMP 6 +%else + DECLARE_REG_TMP 5 +%endif + %macro SAD_X3_4x16P_SSE2 2 %if %1==0 -%if UNIX64 - mov r6, r5 -%endif - lea r5, [r4*3] + lea t0, [r4*3] SAD_X3_START_1x16P_SSE2 %else SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0 %endif SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1 SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2 - SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5 + SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0 %if %1 != %2-1 %if (%1&1) != 0 add r0, 8*FENC_STRIDE @@ -1076,156 +1069,117 @@ %endmacro %macro SAD_X3_START_2x8P_SSE2 0 - movq xmm7, [r0] - movq xmm0, [r1] - movq xmm1, [r2] - movq xmm2, [r3] - movhps xmm7, [r0+FENC_STRIDE] - movhps xmm0, [r1+r4] - movhps xmm1, [r2+r4] - movhps xmm2, [r3+r4] - psadbw xmm0, xmm7 - psadbw xmm1, xmm7 - psadbw xmm2, xmm7 + movq m3, [r0] + movq m0, [r1] + movq m1, [r2] + movq m2, [r3] + movhps m3, [r0+FENC_STRIDE] + movhps m0, [r1+r4] + movhps m1, [r2+r4] + movhps m2, [r3+r4] + psadbw m0, m3 + psadbw m1, m3 + psadbw m2, m3 %endmacro %macro SAD_X3_2x8P_SSE2 4 - movq xmm7, [r0+%1] - movq xmm3, [r1+%2] - movq xmm4, [r2+%2] - movq xmm5, [r3+%2] - movhps xmm7, [r0+%3] - movhps xmm3, [r1+%4] - movhps xmm4, [r2+%4] - movhps xmm5, [r3+%4] - psadbw xmm3, xmm7 - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - paddw xmm0, xmm3 - paddw xmm1, xmm4 - paddw xmm2, xmm5 + movq m6, [r0+%1] + movq m3, [r1+%2] + movq m4, [r2+%2] + movq m5, [r3+%2] + movhps m6, [r0+%3] + movhps m3, [r1+%4] + movhps m4, [r2+%4] + movhps m5, [r3+%4] + psadbw m3, m6 + psadbw m4, m6 + psadbw m5, m6 + paddw m0, m3 + paddw m1, m4 + paddw m2, m5 %endmacro %macro SAD_X4_START_2x8P_SSE2 0 - movq xmm7, [r0] - movq xmm0, [r1] - movq xmm1, [r2] - movq xmm2, [r3] - movq xmm3, [r4] - movhps xmm7, [r0+FENC_STRIDE] - movhps xmm0, [r1+r5] - movhps xmm1, [r2+r5] - movhps xmm2, [r3+r5] - movhps xmm3, [r4+r5] - psadbw xmm0, xmm7 - psadbw xmm1, xmm7 - psadbw xmm2, xmm7 - psadbw xmm3, xmm7 + movq m4, [r0] + movq m0, [r1] + movq m1, [r2] + movq m2, [r3] + movq m3, [r4] + movhps m4, [r0+FENC_STRIDE] + movhps m0, [r1+r5] + movhps m1, [r2+r5] + movhps m2, [r3+r5] + movhps m3, [r4+r5] + psadbw m0, m4 + psadbw m1, m4 + psadbw m2, m4 + psadbw m3, m4 %endmacro %macro SAD_X4_2x8P_SSE2 4 - movq xmm7, [r0+%1] - movq xmm4, [r1+%2] - movq xmm5, [r2+%2] -%if ARCH_X86_64 - movq xmm6, [r3+%2] - movq xmm8, [r4+%2] - movhps xmm7, [r0+%3] - movhps xmm4, [r1+%4] - movhps xmm5, [r2+%4] - movhps xmm6, [r3+%4] - movhps xmm8, [r4+%4] - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - psadbw xmm6, xmm7 - psadbw xmm8, xmm7 - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm6 - paddw xmm3, xmm8 -%else - movhps xmm7, [r0+%3] - movhps xmm4, [r1+%4] - movhps xmm5, [r2+%4] - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - paddw xmm0, xmm4 - paddw xmm1, xmm5 - movq xmm6, [r3+%2] - movq xmm4, [r4+%2] - movhps xmm6, [r3+%4] - movhps xmm4, [r4+%4] - psadbw xmm6, xmm7 - psadbw xmm4, xmm7 - paddw xmm2, xmm6 - paddw xmm3, xmm4 -%endif + movq m6, [r0+%1] + movq m4, [r1+%2] + movq m5, [r2+%2] + movhps m6, [r0+%3] + movhps m4, [r1+%4] + movhps m5, [r2+%4] + psadbw m4, m6 + psadbw m5, m6 + paddw m0, m4 + paddw m1, m5 + movq m4, [r3+%2] + movq m5, [r4+%2] + movhps m4, [r3+%4] + movhps m5, [r4+%4] + psadbw m4, m6 + psadbw m5, m6 + paddw m2, m4 + paddw m3, m5 %endmacro %macro SAD_X4_START_1x16P_SSE2 0 -%if cpuflag(misalign) - mova xmm3, [r0] - movu xmm0, [r1] - movu xmm1, [r2] - movu xmm2, [r3] - psadbw xmm0, xmm3 - psadbw xmm1, xmm3 - psadbw xmm2, xmm3 - psadbw xmm3, [r4] + mova m3, [r0] +%if cpuflag(avx) + psadbw m0, m3, [r1] + psadbw m1, m3, [r2] + psadbw m2, m3, [r3] + psadbw m3, [r4] %else - mova xmm7, [r0] - movu xmm0, [r1] - movu xmm1, [r2] - movu xmm2, [r3] - movu xmm3, [r4] - psadbw xmm0, xmm7 - psadbw xmm1, xmm7 - psadbw xmm2, xmm7 - psadbw xmm3, xmm7 + movu m0, [r1] + movu m1, [r2] + movu m2, [r3] + movu m4, [r4] + psadbw m0, m3 + psadbw m1, m3 + psadbw m2, m3 + psadbw m3, m4 %endif %endmacro %macro SAD_X4_1x16P_SSE2 2 -%if cpuflag(misalign) - mova xmm7, [r0+%1] - movu xmm4, [r1+%2] - movu xmm5, [r2+%2] - movu xmm6, [r3+%2] - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - psadbw xmm6, xmm7 - psadbw xmm7, [r4+%2] - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm6 - paddw xmm3, xmm7 + mova m6, [r0+%1] +%if cpuflag(avx) + psadbw m4, m6, [r1+%2] + psadbw m5, m6, [r2+%2] %else - mova xmm7, [r0+%1] - movu xmm4, [r1+%2] - movu xmm5, [r2+%2] - movu xmm6, [r3+%2] -%if ARCH_X86_64 - movu xmm8, [r4+%2] - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - psadbw xmm6, xmm7 - psadbw xmm8, xmm7 - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm6 - paddw xmm3, xmm8 -%else - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - paddw xmm0, xmm4 - psadbw xmm6, xmm7 - movu xmm4, [r4+%2] - paddw xmm1, xmm5 - psadbw xmm4, xmm7 - paddw xmm2, xmm6 - paddw xmm3, xmm4 + movu m4, [r1+%2] + movu m5, [r2+%2] + psadbw m4, m6 + psadbw m5, m6 %endif + paddw m0, m4 + paddw m1, m5 +%if cpuflag(avx) + psadbw m4, m6, [r3+%2] + psadbw m5, m6, [r4+%2] +%else + movu m4, [r3+%2] + movu m5, [r4+%2] + psadbw m4, m6 + psadbw m5, m6 %endif + paddw m2, m4 + paddw m3, m5 %endmacro %macro SAD_X4_4x16P_SSE2 2 @@ -1251,15 +1205,12 @@ %macro SAD_X3_4x8P_SSE2 2 %if %1==0 -%if UNIX64 - mov r6, r5 -%endif - lea r5, [r4*3] + lea t0, [r4*3] SAD_X3_START_2x8P_SSE2 %else SAD_X3_2x8P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0, FENC_STRIDE*(1+(%1&1)*4), r4*1 %endif - SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), r5 + SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), t0 %if %1 != %2-1 %if (%1&1) != 0 add r0, 8*FENC_STRIDE @@ -1290,78 +1241,86 @@ %endmacro %macro SAD_X3_END_SSE2 0 - movhlps xmm4, xmm0 - movhlps xmm5, xmm1 - movhlps xmm6, xmm2 - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm6 -%if UNIX64 - movd [r6+0], xmm0 - movd [r6+4], xmm1 - movd [r6+8], xmm2 + movifnidn r5, r5mp +%if cpuflag(ssse3) + packssdw m0, m1 + packssdw m2, m2 + phaddd m0, m2 + mova [r5], m0 %else - mov r0, r5mp - movd [r0+0], xmm0 - movd [r0+4], xmm1 - movd [r0+8], xmm2 + movhlps m3, m0 + movhlps m4, m1 + movhlps m5, m2 + paddw m0, m3 + paddw m1, m4 + paddw m2, m5 + movd [r5+0], m0 + movd [r5+4], m1 + movd [r5+8], m2 %endif RET %endmacro %macro SAD_X4_END_SSE2 0 - mov r0, r6mp - psllq xmm1, 32 - psllq xmm3, 32 - paddw xmm0, xmm1 - paddw xmm2, xmm3 - movhlps xmm1, xmm0 - movhlps xmm3, xmm2 - paddw xmm0, xmm1 - paddw xmm2, xmm3 - movq [r0+0], xmm0 - movq [r0+8], xmm2 + mov r0, r6mp +%if cpuflag(ssse3) + packssdw m0, m1 + packssdw m2, m3 + phaddd m0, m2 + mova [r0], m0 +%else + psllq m1, 32 + psllq m3, 32 + paddw m0, m1 + paddw m2, m3 + movhlps m1, m0 + movhlps m3, m2 + paddw m0, m1 + paddw m2, m3 + movq [r0+0], m0 + movq [r0+8], m2 +%endif RET %endmacro %macro SAD_X4_START_2x8P_SSSE3 0 - movddup xmm4, [r0] - movq xmm0, [r1] - movq xmm1, [r3] - movhps xmm0, [r2] - movhps xmm1, [r4] - movddup xmm5, [r0+FENC_STRIDE] - movq xmm2, [r1+r5] - movq xmm3, [r3+r5] - movhps xmm2, [r2+r5] - movhps xmm3, [r4+r5] - psadbw xmm0, xmm4 - psadbw xmm1, xmm4 - psadbw xmm2, xmm5 - psadbw xmm3, xmm5 - paddw xmm0, xmm2 - paddw xmm1, xmm3 + movddup m4, [r0] + movq m0, [r1] + movq m1, [r3] + movhps m0, [r2] + movhps m1, [r4] + movddup m5, [r0+FENC_STRIDE] + movq m2, [r1+r5] + movq m3, [r3+r5] + movhps m2, [r2+r5] + movhps m3, [r4+r5] + psadbw m0, m4 + psadbw m1, m4 + psadbw m2, m5 + psadbw m3, m5 + paddw m0, m2 + paddw m1, m3 %endmacro %macro SAD_X4_2x8P_SSSE3 4 - movddup xmm6, [r0+%1] - movq xmm2, [r1+%2] - movq xmm3, [r3+%2] - movhps xmm2, [r2+%2] - movhps xmm3, [r4+%2] - movddup xmm7, [r0+%3] - movq xmm4, [r1+%4] - movq xmm5, [r3+%4] - movhps xmm4, [r2+%4] - movhps xmm5, [r4+%4] - psadbw xmm2, xmm6 - psadbw xmm3, xmm6 - psadbw xmm4, xmm7 - psadbw xmm5, xmm7 - paddw xmm0, xmm2 - paddw xmm1, xmm3 - paddw xmm0, xmm4 - paddw xmm1, xmm5 + movddup m6, [r0+%1] + movq m2, [r1+%2] + movq m3, [r3+%2] + movhps m2, [r2+%2] + movhps m3, [r4+%2] + movddup m7, [r0+%3] + movq m4, [r1+%4] + movq m5, [r3+%4] + movhps m4, [r2+%4] + movhps m5, [r4+%4] + psadbw m2, m6 + psadbw m3, m6 + psadbw m4, m7 + psadbw m5, m7 + paddw m0, m2 + paddw m1, m3 + paddw m0, m4 + paddw m1, m5 %endmacro %macro SAD_X4_4x8P_SSSE3 2 @@ -1384,9 +1343,9 @@ %endmacro %macro SAD_X4_END_SSSE3 0 - mov r0, r6mp - packssdw xmm0, xmm1 - movdqa [r0], xmm0 + mov r0, r6mp + packssdw m0, m1 + mova [r0], m0 RET %endmacro @@ -1421,15 +1380,12 @@ %macro SAD_X3_4x16P_AVX2 2 %if %1==0 -%if UNIX64 - mov r6, r5 -%endif - lea r5, [r4*3] + lea t0, [r4*3] SAD_X3_START_2x16P_AVX2 %else SAD_X3_2x16P_AVX2 FENC_STRIDE*(0+(%1&1)*4), r4*0, r4*1 %endif - SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, r5 + SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, t0 %if %1 != %2-1 %if (%1&1) != 0 add r0, 8*FENC_STRIDE @@ -1444,12 +1400,12 @@ vbroadcasti128 m4, [r0] vbroadcasti128 m5, [r0+FENC_STRIDE] movu xm0, [r1] - movu xm1, [r3] + movu xm1, [r2] movu xm2, [r1+r5] - movu xm3, [r3+r5] - vinserti128 m0, m0, [r2], 1 + movu xm3, [r2+r5] + vinserti128 m0, m0, [r3], 1 vinserti128 m1, m1, [r4], 1 - vinserti128 m2, m2, [r2+r5], 1 + vinserti128 m2, m2, [r3+r5], 1 vinserti128 m3, m3, [r4+r5], 1 psadbw m0, m4 psadbw m1, m4 @@ -1463,12 +1419,12 @@ vbroadcasti128 m6, [r0+%1] vbroadcasti128 m7, [r0+%3] movu xm2, [r1+%2] - movu xm3, [r3+%2] + movu xm3, [r2+%2] movu xm4, [r1+%4] - movu xm5, [r3+%4] - vinserti128 m2, m2, [r2+%2], 1 + movu xm5, [r2+%4] + vinserti128 m2, m2, [r3+%2], 1 vinserti128 m3, m3, [r4+%2], 1 - vinserti128 m4, m4, [r2+%4], 1 + vinserti128 m4, m4, [r3+%4], 1 vinserti128 m5, m5, [r4+%4], 1 psadbw m2, m6 psadbw m3, m6 @@ -1500,41 +1456,22 @@ %endmacro %macro SAD_X3_END_AVX2 0 - vextracti128 xm4, m0, 1 - vextracti128 xm5, m1, 1 - vextracti128 xm6, m2, 1 - paddw xm0, xm4 - paddw xm1, xm5 - paddw xm2, xm6 - movhlps xm4, xm0 - movhlps xm5, xm1 - movhlps xm6, xm2 - paddw xm0, xm4 - paddw xm1, xm5 - paddw xm2, xm6 -%if UNIX64 - movd [r6+0], xm0 - movd [r6+4], xm1 - movd [r6+8], xm2 -%else - mov r0, r5mp - movd [r0+0], xm0 - movd [r0+4], xm1 - movd [r0+8], xm2 -%endif + movifnidn r5, r5mp + packssdw m0, m1 ; 0 0 1 1 0 0 1 1 + packssdw m2, m2 ; 2 2 _ _ 2 2 _ _ + phaddd m0, m2 ; 0 1 2 _ 0 1 2 _ + vextracti128 xm1, m0, 1 + paddd xm0, xm1 ; 0 1 2 _ + mova [r5], xm0 RET %endmacro %macro SAD_X4_END_AVX2 0 - mov r0, r6mp - punpckhqdq m2, m0, m0 - punpckhqdq m3, m1, m1 - paddw m0, m2 - paddw m1, m3 - packssdw m0, m1 - mova xm2, [deinterleave_sadx4] - vpermd m0, m2, m0 - mova [r0], xm0 + mov r0, r6mp + packssdw m0, m1 ; 0 0 1 1 2 2 3 3 + vextracti128 xm1, m0, 1 + phaddd xm0, xm1 ; 0 1 2 3 + mova [r0], xm0 RET %endmacro @@ -1542,8 +1479,8 @@ ; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, ; uint8_t *pix2, intptr_t i_stride, int scores[3] ) ;----------------------------------------------------------------------------- -%macro SAD_X_SSE2 3 -cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,9 +%macro SAD_X_SSE2 4 +cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4 %assign x 0 %rep %3/4 SAD_X%1_4x%2P_SSE2 x, %3/4 @@ -1553,28 +1490,22 @@ %endmacro INIT_XMM sse2 -SAD_X_SSE2 3, 16, 16 -SAD_X_SSE2 3, 16, 8 -SAD_X_SSE2 3, 8, 16 -SAD_X_SSE2 3, 8, 8 -SAD_X_SSE2 3, 8, 4 -SAD_X_SSE2 4, 16, 16 -SAD_X_SSE2 4, 16, 8 -SAD_X_SSE2 4, 8, 16 -SAD_X_SSE2 4, 8, 8 -SAD_X_SSE2 4, 8, 4 - -INIT_XMM sse2, misalign -SAD_X_SSE2 3, 16, 16 -SAD_X_SSE2 3, 16, 8 -SAD_X_SSE2 4, 16, 16 -SAD_X_SSE2 4, 16, 8 +SAD_X_SSE2 3, 16, 16, 7 +SAD_X_SSE2 3, 16, 8, 7 +SAD_X_SSE2 3, 8, 16, 7 +SAD_X_SSE2 3, 8, 8, 7 +SAD_X_SSE2 3, 8, 4, 7 +SAD_X_SSE2 4, 16, 16, 7 +SAD_X_SSE2 4, 16, 8, 7 +SAD_X_SSE2 4, 8, 16, 7 +SAD_X_SSE2 4, 8, 8, 7 +SAD_X_SSE2 4, 8, 4, 7 INIT_XMM sse3 -SAD_X_SSE2 3, 16, 16 -SAD_X_SSE2 3, 16, 8 -SAD_X_SSE2 4, 16, 16 -SAD_X_SSE2 4, 16, 8 +SAD_X_SSE2 3, 16, 16, 7 +SAD_X_SSE2 3, 16, 8, 7 +SAD_X_SSE2 4, 16, 16, 7 +SAD_X_SSE2 4, 16, 8, 7 %macro SAD_X_SSSE3 3 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8 @@ -1587,9 +1518,19 @@ %endmacro INIT_XMM ssse3 -SAD_X_SSSE3 4, 8, 16 -SAD_X_SSSE3 4, 8, 8 -SAD_X_SSSE3 4, 8, 4 +SAD_X_SSE2 3, 16, 16, 7 +SAD_X_SSE2 3, 16, 8, 7 +SAD_X_SSE2 4, 16, 16, 7 +SAD_X_SSE2 4, 16, 8, 7 +SAD_X_SSSE3 4, 8, 16 +SAD_X_SSSE3 4, 8, 8 +SAD_X_SSSE3 4, 8, 4 + +INIT_XMM avx +SAD_X_SSE2 3, 16, 16, 6 +SAD_X_SSE2 3, 16, 8, 6 +SAD_X_SSE2 4, 16, 16, 7 +SAD_X_SSE2 4, 16, 8, 7 %macro SAD_X_AVX2 4 cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/sad16-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad16-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad16-a.asm: x86 high depth sad functions ;***************************************************************************** -;* Copyright (C) 2010-2013 x264 project +;* Copyright (C) 2010-2014 x264 project ;* ;* Authors: Oskar Arvidsson <oskar@irock.se> ;* Henrik Gramner <henrik@gramner.com>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/trellis-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/trellis-64.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* trellis-64.asm: x86_64 trellis quantization ;***************************************************************************** -;* Copyright (C) 2012-2013 x264 project +;* Copyright (C) 2012-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;*
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/util.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/util.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * util.h: x86 inline asm ***************************************************************************** - * Copyright (C) 2008-2013 x264 project + * Copyright (C) 2008-2014 x264 project * * Authors: Jason Garrett-Glaser <darkshikari@gmail.com> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86inc.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2013 x264 project +;* Copyright (C) 2005-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Anton Mitrofanov <BugMaster@narod.ru> @@ -42,6 +42,14 @@ %define public_prefix private_prefix %endif +%ifndef STACK_ALIGNMENT + %if ARCH_X86_64 + %define STACK_ALIGNMENT 16 + %else + %define STACK_ALIGNMENT 4 + %endif +%endif + %define WIN64 0 %define UNIX64 0 %if ARCH_X86_64 @@ -49,6 +57,8 @@ %define WIN64 1 %elifidn __OUTPUT_FORMAT__,win64 %define WIN64 1 + %elifidn __OUTPUT_FORMAT__,x64 + %define WIN64 1 %else %define UNIX64 1 %endif @@ -92,8 +102,9 @@ ; %1 = number of arguments. loads them from stack if needed. ; %2 = number of registers used. pushes callee-saved regs if needed. ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. -; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x, -; MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes), +; %4 = (optional) stack size to be allocated. The stack will be aligned before +; allocating the specified stack size. If the required stack alignment is +; larger than the known stack alignment the stack will be manually aligned ; and an extra register will be allocated to hold the original stack ; pointer (to not invalidate r0m etc.). To prevent the use of an extra ; register as stack pointer, request a negative stack size. @@ -101,8 +112,10 @@ ; PROLOGUE can also be invoked by adding the same options to cglobal ; e.g. -; cglobal foo, 2,3,0, dst, src, tmp -; declares a function (foo), taking two args (dst and src) and one local variable (tmp) +; cglobal foo, 2,3,7,0x40, dst, src, tmp +; declares a function (foo) that automatically loads two arguments (dst and +; src) into registers, uses one additional register (tmp) plus 7 vector +; registers (m0-m6) and allocates 0x40 bytes of stack space. ; TODO Some functions can use some args directly from the stack. If they're the ; last args then you can just not declare them, but if they're in the middle @@ -302,26 +315,28 @@ %assign n_arg_names %0 %endmacro +%define required_stack_alignment ((mmsize + 15) & ~15) + %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) %ifnum %1 %if %1 != 0 - %assign %%stack_alignment ((mmsize + 15) & ~15) + %assign %%pad 0 %assign stack_size %1 %if stack_size < 0 %assign stack_size -stack_size %endif - %assign stack_size_padded stack_size %if WIN64 - %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space + %assign %%pad %%pad + 32 ; shadow space %if mmsize != 8 %assign xmm_regs_used %2 %if xmm_regs_used > 8 - %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16 + %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers %endif %endif %endif - %if mmsize <= 16 && HAVE_ALIGNED_STACK - %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1)) + %if required_stack_alignment <= STACK_ALIGNMENT + ; maintain the current stack alignment + %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) SUB rsp, stack_size_padded %else %assign %%reg_num (regs_used - 1) @@ -330,17 +345,17 @@ ; it, i.e. in [rsp+stack_size_padded], so we can restore the ; stack in a single instruction (i.e. mov rsp, rstk or mov ; rsp, [rsp+stack_size_padded]) - mov rstk, rsp %if %1 < 0 ; need to store rsp on stack - sub rsp, gprsize+stack_size_padded - and rsp, ~(%%stack_alignment-1) - %xdefine rstkm [rsp+stack_size_padded] - mov rstkm, rstk + %xdefine rstkm [rsp + stack_size + %%pad] + %assign %%pad %%pad + gprsize %else ; can keep rsp in rstk during whole function - sub rsp, stack_size_padded - and rsp, ~(%%stack_alignment-1) %xdefine rstkm rstk %endif + %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1)) + mov rstk, rsp + and rsp, ~(required_stack_alignment-1) + sub rsp, stack_size_padded + movifnidn rstkm, rstk %endif WIN64_PUSH_XMM %endif @@ -349,7 +364,7 @@ %macro SETUP_STACK_POINTER 1 %ifnum %1 - %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32) + %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT %if %1 > 0 %assign regs_used (regs_used + 1) %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2 @@ -423,7 +438,9 @@ %assign xmm_regs_used %1 ASSERT xmm_regs_used <= 16 %if xmm_regs_used > 8 - %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32 + ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack. + %assign %%pad (xmm_regs_used-8)*16 + 32 + %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) SUB rsp, stack_size_padded %endif WIN64_PUSH_XMM @@ -439,7 +456,7 @@ %endrep %endif %if stack_size_padded > 0 - %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0) + %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT mov rsp, rstkm %else add %1, stack_size_padded @@ -505,7 +522,7 @@ %macro RET 0 %if stack_size_padded > 0 -%if mmsize == 32 || HAVE_ALIGNED_STACK == 0 +%if required_stack_alignment > STACK_ALIGNMENT mov rsp, rstkm %else add rsp, stack_size_padded @@ -561,7 +578,7 @@ %macro RET 0 %if stack_size_padded > 0 -%if mmsize == 32 || HAVE_ALIGNED_STACK == 0 +%if required_stack_alignment > STACK_ALIGNMENT mov rsp, rstkm %else add rsp, stack_size_padded @@ -731,11 +748,10 @@ %assign cpuflags_cache64 (1<<17) %assign cpuflags_slowctz (1<<18) %assign cpuflags_lzcnt (1<<19) -%assign cpuflags_misalign (1<<20) -%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<22) -%assign cpuflags_bmi1 (1<<23)|cpuflags_lzcnt -%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1 +%assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant +%assign cpuflags_atom (1<<21) +%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt +%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1 %define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x)) %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x)) @@ -777,9 +793,9 @@ %endmacro ; Merge mmx and sse* -; m# is a simd regsiter of the currently selected size -; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx) -; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx) +; m# is a simd register of the currently selected size +; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m# +; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m# ; (All 3 remain in sync through SWAP.) %macro CAT_XDEFINE 3 @@ -802,12 +818,12 @@ %assign %%i 0 %rep 8 CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nmm, %%i, %%i + CAT_XDEFINE nnmm, %%i, %%i %assign %%i %%i+1 %endrep %rep 8 CAT_UNDEF m, %%i - CAT_UNDEF nmm, %%i + CAT_UNDEF nnmm, %%i %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 @@ -828,7 +844,7 @@ %assign %%i 0 %rep num_mmregs CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nxmm, %%i, %%i + CAT_XDEFINE nnxmm, %%i, %%i %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 @@ -865,7 +881,7 @@ %define xmmxmm%1 xmm%1 %define xmmymm%1 xmm%1 %define ymmmm%1 mm%1 - %define ymmxmm%1 ymm%1 + %define ymmxmm%1 xmm%1 %define ymmymm%1 ymm%1 %define xm%1 xmm %+ m%1 %define ym%1 ymm %+ m%1 @@ -898,7 +914,7 @@ %endrep %rep %0/2 %xdefine m%1 %%tmp%2 - CAT_XDEFINE n, m%1, %1 + CAT_XDEFINE nn, m%1, %1 %rotate 2 %endrep %endmacro @@ -916,16 +932,16 @@ %xdefine %%tmp m%1 %xdefine m%1 m%2 %xdefine m%2 %%tmp - CAT_XDEFINE n, m%1, %1 - CAT_XDEFINE n, m%2, %2 + CAT_XDEFINE nn, m%1, %1 + CAT_XDEFINE nn, m%2, %2 %rotate 1 %endrep %endmacro %macro SWAP_INTERNAL_NAME 2-* - %xdefine %%args n %+ %1 + %xdefine %%args nn %+ %1 %rep %0-1 - %xdefine %%args %%args, n %+ %2 + %xdefine %%args %%args, nn %+ %2 %rotate 1 %endrep SWAP_INTERNAL_NUM %%args @@ -952,7 +968,7 @@ %assign %%i 0 %rep num_mmregs CAT_XDEFINE m, %%i, %1_m %+ %%i - CAT_XDEFINE n, m %+ %%i, %%i + CAT_XDEFINE nn, m %+ %%i, %%i %assign %%i %%i+1 %endrep %endif @@ -1031,25 +1047,25 @@ ;%5+: operands %macro RUN_AVX_INSTR 5-8+ %ifnum sizeof%6 - %assign %%sizeofreg sizeof%6 + %assign __sizeofreg sizeof%6 %elifnum sizeof%5 - %assign %%sizeofreg sizeof%5 + %assign __sizeofreg sizeof%5 %else - %assign %%sizeofreg mmsize + %assign __sizeofreg mmsize %endif - %assign %%emulate_avx 0 - %if avx_enabled && %%sizeofreg >= 16 - %xdefine %%instr v%1 + %assign __emulate_avx 0 + %if avx_enabled && __sizeofreg >= 16 + %xdefine __instr v%1 %else - %xdefine %%instr %1 + %xdefine __instr %1 %if %0 >= 7+%3 - %assign %%emulate_avx 1 + %assign __emulate_avx 1 %endif %endif - %if %%emulate_avx - %xdefine %%src1 %6 - %xdefine %%src2 %7 + %if __emulate_avx + %xdefine __src1 %6 + %xdefine __src2 %7 %ifnidn %5, %6 %if %0 >= 8 CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8 @@ -1061,31 +1077,31 @@ ; 3-operand AVX instructions with a memory arg can only have it in src2, ; whereas SSE emulation prefers to have it in src1 (i.e. the mov). ; So, if the instruction is commutative with a memory arg, swap them. - %xdefine %%src1 %7 - %xdefine %%src2 %6 + %xdefine __src1 %7 + %xdefine __src2 %6 %endif %endif - %if %%sizeofreg == 8 - MOVQ %5, %%src1 + %if __sizeofreg == 8 + MOVQ %5, __src1 %elif %2 - MOVAPS %5, %%src1 + MOVAPS %5, __src1 %else - MOVDQA %5, %%src1 + MOVDQA %5, __src1 %endif %endif %if %0 >= 8 - %1 %5, %%src2, %8 + %1 %5, __src2, %8 %else - %1 %5, %%src2 + %1 %5, __src2 %endif %elif %0 >= 8 - %%instr %5, %6, %7, %8 + __instr %5, %6, %7, %8 %elif %0 == 7 - %%instr %5, %6, %7 + __instr %5, %6, %7 %elif %0 == 6 - %%instr %5, %6 + __instr %5, %6 %else - %%instr %5 + __instr %5 %endif %endmacro @@ -1384,15 +1400,18 @@ %macro %1 4-7 %1, %2, %3 %if cpuflag(xop) v%5 %1, %2, %3, %4 - %else + %elifnidn %1, %4 %6 %1, %2, %3 %7 %1, %4 + %else + %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported %endif %endmacro %endmacro -FMA_INSTR pmacsdd, pmulld, paddd FMA_INSTR pmacsww, pmullw, paddw +FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation +FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation FMA_INSTR pmadcswd, pmaddwd, paddd ; convert FMA4 to FMA3 if possible
View file
x264-snapshot-20130723-2245.tar.bz2/common/x86/x86util.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86util.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86util.asm: x86 utility macros ;***************************************************************************** -;* Copyright (C) 2008-2013 x264 project +;* Copyright (C) 2008-2014 x264 project ;* ;* Authors: Holger Lubitz <holger@lubitz.org> ;* Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/configure -> x264-snapshot-20140321-2245.tar.bz2/configure
Changed
@@ -30,7 +30,6 @@ --disable-thread disable multithreaded encoding --enable-win32thread use win32threads (windows only) --disable-interlaced disable interlaced encoding support - --enable-visualize enable visualization (X11 only) --bit-depth=BIT_DEPTH set output bit depth (8-10) [8] --chroma-format=FORMAT output chroma format (420, 422, 444, all) [all] @@ -52,6 +51,7 @@ --disable-lavf disable libavformat support --disable-ffms disable ffmpegsource support --disable-gpac disable gpac support + --disable-lsmash disable lsmash support EOF exit 1 @@ -264,6 +264,8 @@ lavf="auto" ffms="auto" gpac="auto" +lsmash="auto" +mp4="no" gpl="yes" thread="auto" swscale="auto" @@ -273,7 +275,6 @@ gprof="no" strip="no" pic="no" -vis="no" bit_depth="8" chroma_format="all" compiler="GNU" @@ -290,7 +291,8 @@ EXE="" # list of all preprocessor HAVE values we can define -CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL" +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ + LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH" # parse options @@ -342,6 +344,9 @@ --disable-gpac) gpac="no" ;; + --disable-lsmash) + lsmash="no" + ;; --disable-gpl) gpl="no" ;; @@ -380,9 +385,6 @@ --enable-pic) pic="yes" ;; - --enable-visualize) - vis="yes" - ;; --host=*) host="$optarg" ;; @@ -423,6 +425,7 @@ AR="${AR-${cross_prefix}ar}" RANLIB="${RANLIB-${cross_prefix}ranlib}" STRIP="${STRIP-${cross_prefix}strip}" +INSTALL="${INSTALL-install}" if [ "x$host" = x ]; then host=`${SRCPATH}/config.guess` @@ -503,12 +506,13 @@ CFLAGS="$CFLAGS -mno-cygwin" LDFLAGS="$LDFLAGS -mno-cygwin" fi - if cpp_check "" "" "defined(__CYGWIN32__)" ; then + if cpp_check "" "" "defined(__CYGWIN__)" ; then define HAVE_MALLOC_H SYS="CYGWIN" else SYS="WINDOWS" DEVNULL="NUL" + LDFLAGSCLI="$LDFLAGSCLI -lshell32" RC="${RC-${cross_prefix}windres}" fi ;; @@ -516,6 +520,7 @@ SYS="WINDOWS" EXE=".exe" DEVNULL="NUL" + LDFLAGSCLI="$LDFLAGSCLI -lshell32" [ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}" ;; sunos*|solaris*) @@ -527,6 +532,15 @@ else LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o" fi + if test -x /usr/ucb/install ; then + INSTALL=/usr/ucb/install + elif test -x /usr/bin/ginstall ; then + # OpenSolaris + INSTALL=/usr/bin/ginstall + elif test -x /usr/gnu/bin/install ; then + # OpenSolaris + INSTALL=/usr/gnu/bin/install + fi HAVE_GETOPT_LONG=0 ;; *qnx*) @@ -543,7 +557,7 @@ LDFLAGS="$LDFLAGS $libm" -aligned_stack=1 +stack_alignment=16 case $host_cpu in i*86) ARCH="X86" @@ -563,8 +577,7 @@ if [ $SYS = LINUX ]; then # < 11 is completely incapable of keeping a mod16 stack if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then - define BROKEN_STACK_ALIGNMENT - aligned_stack=0 + stack_alignment=4 # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so. elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then CFLAGS="$CFLAGS -falign-stack=assume-16-byte" @@ -572,7 +585,7 @@ # >= 12 defaults to a mod16 stack fi # icl on windows has no mod16 stack support - [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0 + [ $SYS = WINDOWS ] && stack_alignment=4 fi if [ "$SYS" = MACOSX ]; then ASFLAGS="$ASFLAGS -f macho -DPREFIX" @@ -595,7 +608,7 @@ CFLAGS="$CFLAGS -arch x86_64" LDFLAGS="$LDFLAGS -arch x86_64" fi - elif [ "$SYS" = WINDOWS ]; then + elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then ASFLAGS="$ASFLAGS -f win32 -m amd64" # only the GNU toolchain is inconsistent in prefixing function names with _ [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX" @@ -667,7 +680,6 @@ ARCH="$(echo $host_cpu | tr a-z A-Z)" ;; esac -ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}" if [ $SYS = WINDOWS ]; then if ! rc_check "0 RCDATA {0}" ; then @@ -719,10 +731,11 @@ echo "If you really want to compile without asm, configure with --disable-asm." exit 1 fi + ASFLAGS="$ASFLAGS -Worphan-labels" define HAVE_MMX - if cc_check '' -mpreferred-stack-boundary=5 ; then + if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then CFLAGS="$CFLAGS -mpreferred-stack-boundary=5" - define HAVE_32B_STACK_ALIGNMENT + stack_alignment=32 fi fi @@ -747,6 +760,9 @@ define ARCH_$ARCH define SYS_$SYS +define STACK_ALIGNMENT $stack_alignment +ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment" + # skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail if [ $compiler = GNU ]; then echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c @@ -792,10 +808,15 @@ fi ;; QNX) - cc_check pthread.h -lc && thread="posix" && libpthread="-lc" + cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc" ;; *) - cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread" + if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then + thread="posix" + libpthread="-lpthread" + else + cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread="" + fi ;; esac fi @@ -820,16 +841,8 @@ define HAVE_LOG2F fi -if [ "$vis" = "yes" ] ; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -I/usr/X11R6/include" - if cc_check "X11/Xlib.h" "-L/usr/X11R6/lib -lX11" "XOpenDisplay(0);" ; then - LDFLAGS="-L/usr/X11R6/lib -lX11 $LDFLAGS" - define HAVE_VISUALIZE - else - vis="no" - CFLAGS="$save_CFLAGS" - fi +if [ "$SYS" = "LINUX" -a \( "$ARCH" = "X86" -o "$ARCH" = "X86_64" \) ] && cc_check "sys/mman.h" "" "MADV_HUGEPAGE;" ; then + define HAVE_THP fi if [ "$swscale" = "auto" ] ; then @@ -841,10 +854,10 @@ [ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil" if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then - if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(PIX_FMT_RGB)" ; then + if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then swscale="yes" else - echo "Warning: PIX_FMT_RGB is missing from libavutil, update for swscale support" + echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support" fi fi fi @@ -857,7 +870,7 @@ fi if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then LAVF_LIBS="-lavformat" - for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do + for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib" done fi @@ -915,11 +928,30 @@ fi fi -if [ "$gpac" = "auto" ] ; then +if [ "$lsmash" = "auto" ] ; then + lsmash="no" + if ${cross_prefix}pkg-config --exists liblsmash 2>/dev/null; then + LSMASH_LIBS="$LSMASH_LIBS $(${cross_prefix}pkg-config --libs liblsmash)" + LSMASH_CFLAGS="$LSMASH_CFLAGS $(${cross_prefix}pkg-config --cflags liblsmash)" + fi + [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash" + + if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then + if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 0 || (LSMASH_VERSION_MAJOR == 0 && LSMASH_VERSION_MINOR >= 1)" ; then + lsmash="yes" + else + echo "Warning: lsmash is too old, update to rev.751 or later" + fi + fi +fi + +if [ "$gpac" = "auto" -a "$lsmash" != "yes" ] ; then gpac="no" - cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static" + GPAC_LIBS="-lgpac_static" + cc_check "" -lz && GPAC_LIBS="$GPAC_LIBS -lz" if [ "$SYS" = "WINDOWS" ] ; then - GPAC_LIBS="$GPAC_LIBS -lwinmm" + cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32" + cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm" fi if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then @@ -929,18 +961,22 @@ fi fi fi -if [ "$gpac" = "yes" ] ; then + +if [ "$lsmash" = "yes" ] ; then + mp4="lsmash" + LDFLAGSCLI="$LSMASH_LIBS $LDFLAGSCLI" + CFLAGS="$CFLAGS $LSMASH_CFLAGS" + define HAVE_LSMASH +elif [ "$gpac" = "yes" ] ; then + mp4="gpac" define HAVE_GPAC - if cc_check gpac/isomedia.h "-Werror $GPAC_LIBS" "void *p; p = gf_malloc(1); gf_free(p);" ; then - define HAVE_GF_MALLOC - fi LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI" fi if [ "$avs" = "auto" ] ; then avs="no" # cygwin can use avisynth if it can use LoadLibrary - if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then + if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then avs="avisynth" define HAVE_AVS define USE_AVXSYNTH 0 @@ -1038,7 +1074,7 @@ fi log_ok # cygwin can use opencl if it can use LoadLibrary - if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then + if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then opencl="yes" define HAVE_OPENCL elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then @@ -1129,6 +1165,7 @@ AR=$AR RANLIB=$RANLIB STRIP=$STRIP +INSTALL=$INSTALL AS=$AS ASFLAGS=$ASFLAGS RC=$RC @@ -1219,8 +1256,8 @@ Name: x264 Description: H.264 (MPEG4 AVC) encoder library Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//') -Libs: -L$libdir -lx264 -Libs.private: $libpthread $libm $libdl +Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl) +Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl) Cflags: -I$includedir EOF @@ -1241,7 +1278,7 @@ avs: $avs lavf: $lavf ffms: $ffms -gpac: $gpac +mp4: $mp4 gpl: $gpl thread: $thread opencl: $opencl @@ -1250,7 +1287,6 @@ gprof: $gprof strip: $strip PIC: $pic -visualize: $vis bit depth: $bit_depth chroma format: $chroma_format EOF
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * analyse.c: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -436,7 +436,7 @@ /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it. * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */ uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8; - a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX; + a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX; a->b_fast_intra = 0; a->b_avoid_topright = 0; @@ -618,6 +618,24 @@ {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1}, }; +static const int8_t i8x8_mode_available[2][5][10] = +{ + { + {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1}, + {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1}, + {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1}, + {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1}, + }, + { + {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1}, + {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1}, + } +}; + static const int8_t i4x4_mode_available[2][5][10] = { { @@ -632,7 +650,7 @@ {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1}, {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1}, {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1}, - {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1}, + {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1}, } }; @@ -655,7 +673,7 @@ int avoid_topright = force_intra && (i&1); int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT); idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT); - return i4x4_mode_available[avoid_topright][idx]; + return i8x8_mode_available[avoid_topright][idx]; } static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i ) @@ -793,58 +811,60 @@ int lambda = a->i_lambda; /*---------------- Try all mode and calculate their score ---------------*/ + /* Disabled i16x16 for AVC-Intra compat */ + if( !h->param.i_avcintra_class ) + { + const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra ); - /* 16x16 prediction selection */ - const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra ); + /* Not heavily tuned */ + static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 }; + int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX; - /* Not heavily tuned */ - static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 }; - int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX; - - if( !h->mb.b_lossless && predict_mode[3] >= 0 ) - { - h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir ); - a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0); - a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1); - a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2); - COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 ); - COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 ); - COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 ); - - /* Plane is expensive, so don't check it unless one of the previous modes was useful. */ - if( a->i_satd_i16x16 <= i16x16_thresh ) - { - h->predict_16x16[I_PRED_16x16_P]( p_dst ); - a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ); - a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3); - COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 ); + if( !h->mb.b_lossless && predict_mode[3] >= 0 ) + { + h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir ); + a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0); + a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1); + a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2); + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 ); + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 ); + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 ); + + /* Plane is expensive, so don't check it unless one of the previous modes was useful. */ + if( a->i_satd_i16x16 <= i16x16_thresh ) + { + h->predict_16x16[I_PRED_16x16_P]( p_dst ); + a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ); + a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3); + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 ); + } } - } - else - { - for( ; *predict_mode >= 0; predict_mode++ ) + else { - int i_satd; - int i_mode = *predict_mode; + for( ; *predict_mode >= 0; predict_mode++ ) + { + int i_satd; + int i_mode = *predict_mode; - if( h->mb.b_lossless ) - x264_predict_lossless_16x16( h, 0, i_mode ); - else - h->predict_16x16[i_mode]( p_dst ); + if( h->mb.b_lossless ) + x264_predict_lossless_16x16( h, 0, i_mode ); + else + h->predict_16x16[i_mode]( p_dst ); - i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) + - lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] ); - COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode ); - a->i_satd_i16x16_dir[i_mode] = i_satd; + i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) + + lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] ); + COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode ); + a->i_satd_i16x16_dir[i_mode] = i_satd; + } } - } - if( h->sh.i_type == SLICE_TYPE_B ) - /* cavlc mb type prefix */ - a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16]; + if( h->sh.i_type == SLICE_TYPE_B ) + /* cavlc mb type prefix */ + a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16]; - if( a->i_satd_i16x16 > i16x16_thresh ) - return; + if( a->i_satd_i16x16 > i16x16_thresh ) + return; + } uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8; /* 8x8 prediction selection */ @@ -870,7 +890,7 @@ int i_best = COST_MAX; int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx ); - predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx ); + const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx ); h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS ); if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 ) @@ -985,7 +1005,7 @@ int i_best = COST_MAX; int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx ); - predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx ); + const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx ); if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP ) /* emulate missing topright samples */ @@ -2101,7 +2121,7 @@ int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 ) + ref_costs + l0_mv_cost + l1_mv_cost; - if( h->mb.b_chroma_me ) + if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi ) { ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * analyse.h: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cabac.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: cabac bitstream writing ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cavlc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cavlc.c: cavlc bitstream writing ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -500,6 +500,9 @@ && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) ) { bs_write1( s, MB_INTERLACED ); +#if !RDO_SKIP_BS + h->mb.field_decoding_flag = MB_INTERLACED; +#endif } #if !RDO_SKIP_BS
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/encoder.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/encoder.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * encoder.c: top-level encoder functions ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -33,10 +33,6 @@ #include "macroblock.h" #include "me.h" -#if HAVE_VISUALIZE -#include "common/visualize.h" -#endif - //#define DEBUG_MB_TYPE #define bs_write_ue bs_write_ue_big @@ -82,7 +78,7 @@ static void x264_frame_dump( x264_t *h ) { - FILE *f = fopen( h->param.psz_dump_yuv, "r+b" ); + FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" ); if( !f ) return; @@ -403,21 +399,6 @@ { if( h->param.i_sync_lookahead ) x264_lower_thread_priority( 10 ); - -#if HAVE_MMX - /* Misalign mask has to be set separately for each thread. */ - if( h->param.cpu&X264_CPU_SSE_MISALIGN ) - x264_cpu_mask_misalign_sse(); -#endif -} - -static void x264_lookahead_thread_init( x264_t *h ) -{ -#if HAVE_MMX - /* Misalign mask has to be set separately for each thread. */ - if( h->param.cpu&X264_CPU_SSE_MISALIGN ) - x264_cpu_mask_misalign_sse(); -#endif } #endif @@ -486,7 +467,7 @@ x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" ); return -1; } - else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 ) + else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 ) { x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" ); return -1; @@ -532,6 +513,12 @@ return -1; } + if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 ) + { + h->param.vui.i_sar_width = 0; + h->param.vui.i_sar_height = 0; + } + if( h->param.i_threads == X264_THREADS_AUTO ) h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2; int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 ); @@ -583,6 +570,8 @@ { h->param.b_intra_refresh = 0; h->param.analyse.i_weighted_pred = 0; + h->param.i_frame_reference = 1; + h->param.i_dpb_size = 1; } h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 ); @@ -616,6 +605,188 @@ x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" ); return -1; } + + if( PARAM_INTERLACED ) + h->param.b_pic_struct = 1; + + if( h->param.i_avcintra_class ) + { + if( BIT_DEPTH != 10 ) + { + x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH ); + x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" ); + return -1; + } + + int type = h->param.i_avcintra_class == 200 ? 2 : + h->param.i_avcintra_class == 100 ? 1 : + h->param.i_avcintra_class == 50 ? 0 : -1; + if( type < 0 ) + { + x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" ); + return -1; + } + + /* [50/100/200][res][fps] */ + static const struct + { + uint16_t fps_num; + uint16_t fps_den; + uint8_t interlaced; + uint16_t frame_size; + const uint8_t *cqm_4ic; + const uint8_t *cqm_8iy; + } avcintra_lut[3][2][7] = + { + {{{ 60000, 1001, 0, 912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 50, 1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 30000, 1001, 0, 912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 25, 1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 24000, 1001, 0, 912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}, + {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy }, + { 25, 1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy }, + { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 50, 1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 25, 1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }, + { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}}, + {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }, + { 50, 1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }, + { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }, + { 25, 1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }, + { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }}, + {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy }, + { 25, 1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy }, + { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 50, 1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 25, 1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}, + {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }, + { 50, 1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }}, + {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy }, + { 25, 1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy }, + { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 50, 1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 25, 1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }, + { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}} + }; + + int res = -1; + if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type ) + { + if( h->param.i_width == 1440 && h->param.i_height == 1080 ) res = 1; + else if( h->param.i_width == 960 && h->param.i_height == 720 ) res = 0; + } + else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type ) + { + if( h->param.i_width == 1920 && h->param.i_height == 1080 ) res = 1; + else if( h->param.i_width == 1280 && h->param.i_height == 720 ) res = 0; + } + else + { + x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class ); + return -1; + } + + if( res < 0 ) + { + x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n", + h->param.i_width, h->param.i_height, h->param.i_avcintra_class ); + return -1; + } + + if( h->param.nalu_process ) + { + x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" ); + return -1; + } + + if( !h->param.b_repeat_headers ) + { + x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" ); + return -1; + } + + int i; + uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den; + x264_reduce_fraction( &fps_num, &fps_den ); + for( i = 0; i < 7; i++ ) + { + if( avcintra_lut[type][res][i].fps_num == fps_num && + avcintra_lut[type][res][i].fps_den == fps_den && + avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED ) + { + break; + } + } + if( i == 7 ) + { + x264_log( h, X264_LOG_ERROR, "FPS %d/%d%c not compatible with AVC-Intra\n", + h->param.i_fps_num, h->param.i_fps_den, PARAM_INTERLACED ? 'i' : 'p' ); + return -1; + } + + h->param.i_keyint_max = 1; + h->param.b_intra_refresh = 0; + h->param.analyse.i_weighted_pred = 0; + h->param.i_frame_reference = 1; + h->param.i_dpb_size = 1; + + h->param.b_bluray_compat = 0; + h->param.b_vfr_input = 0; + h->param.b_aud = 1; + h->param.vui.i_chroma_loc = 0; + h->param.i_nal_hrd = X264_NAL_HRD_NONE; + h->param.b_deblocking_filter = 0; + h->param.b_stitchable = 1; + h->param.b_pic_struct = 0; + h->param.analyse.b_transform_8x8 = 1; + h->param.analyse.intra = X264_ANALYSE_I8x8; + h->param.analyse.i_chroma_qp_offset = res && type ? 3 : 4; + h->param.b_cabac = !type; + h->param.rc.i_vbv_buffer_size = avcintra_lut[type][res][i].frame_size; + h->param.rc.i_vbv_max_bitrate = + h->param.rc.i_bitrate = h->param.rc.i_vbv_buffer_size * fps_num / fps_den; + h->param.rc.i_rc_method = X264_RC_ABR; + h->param.rc.f_vbv_buffer_init = 1.0; + h->param.rc.b_filler = 1; + h->param.i_cqm_preset = X264_CQM_CUSTOM; + memcpy( h->param.cqm_4iy, x264_cqm_jvt4i, sizeof(h->param.cqm_4iy) ); + memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) ); + memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) ); + + /* Need exactly 10 slices of equal MB count... why? $deity knows... */ + h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10; + h->param.i_slice_max_size = 0; + /* The slice structure only allows a maximum of 2 threads for 1080i/p + * and 1 or 5 threads for 720p */ + if( h->param.b_sliced_threads ) + { + if( res ) + h->param.i_threads = X264_MIN( 2, h->param.i_threads ); + else + { + h->param.i_threads = X264_MIN( 5, h->param.i_threads ); + if( h->param.i_threads < 5 ) + h->param.i_threads = 1; + } + } + + if( type ) + h->param.vui.i_sar_width = h->param.vui.i_sar_height = 1; + else + { + h->param.vui.i_sar_width = 4; + h->param.vui.i_sar_height = 3; + } + + /* Official encoder doesn't appear to go under 13 + * and Avid cannot handle negative QPs */ + h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 ); + } + h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 ); h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 ); h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX ); @@ -917,10 +1088,10 @@ h->param.analyse.i_chroma_qp_offset += 6; /* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */ /* so we lower the chroma QP offset to compensate */ - if( b_open && h->mb.i_psy_rd ) + if( b_open && h->mb.i_psy_rd && !h->param.i_avcintra_class ) h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2; /* Psy trellis has a similar effect. */ - if( b_open && h->mb.i_psy_trellis ) + if( b_open && h->mb.i_psy_trellis && !h->param.i_avcintra_class ) h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2; h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12); /* MB-tree requires AQ to be on, even if the strength is zero. */ @@ -1041,9 +1212,6 @@ h->param.i_sps_id &= 31; - if( PARAM_INTERLACED ) - h->param.b_pic_struct = 1; - h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR ); if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size ) @@ -1059,6 +1227,9 @@ h->param.i_nal_hrd = X264_NAL_HRD_VBR; } + if( h->param.i_nal_hrd == X264_NAL_HRD_CBR ) + h->param.rc.b_filler = 1; + /* ensure the booleans are 0 or 1 so they can be used in math */ #define BOOLIFY(x) h->param.x = !!h->param.x BOOLIFY( b_cabac ); @@ -1068,7 +1239,6 @@ BOOLIFY( b_sliced_threads ); BOOLIFY( b_interlaced ); BOOLIFY( b_intra_refresh ); - BOOLIFY( b_visualize ); BOOLIFY( b_aud ); BOOLIFY( b_repeat_headers ); BOOLIFY( b_annexb ); @@ -1094,6 +1264,7 @@ BOOLIFY( rc.b_stat_write ); BOOLIFY( rc.b_stat_read ); BOOLIFY( rc.b_mb_tree ); + BOOLIFY( rc.b_filler ); #undef BOOLIFY return 0; @@ -1187,7 +1358,6 @@ h->param.vui.i_sar_width = i_w; h->param.vui.i_sar_height = i_h; } - x264_sps_init( h->sps, h->param.i_sps_id, &h->param ); } } } @@ -1241,11 +1411,11 @@ goto fail; } + x264_set_aspect_ratio( h, &h->param, 1 ); + x264_sps_init( h->sps, h->param.i_sps_id, &h->param ); x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps ); - x264_set_aspect_ratio( h, &h->param, 1 ); - x264_validate_levels( h, 1 ); h->chroma_qp_table = i_chroma_qp_table + 12 + h->pps->i_chroma_qp_index_offset; @@ -1396,11 +1566,13 @@ h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */ CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size ); + CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) ); + if( h->param.i_threads > 1 && x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) ) goto fail; if( h->param.i_lookahead_threads > 1 && - x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) ) + x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) ) goto fail; #if HAVE_OPENCL @@ -1424,6 +1596,7 @@ CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) ); *h->lookahead_thread[i] = *h; } + *h->reconfig_h = *h; for( int i = 0; i < h->param.i_threads; i++ ) { @@ -1479,7 +1652,7 @@ if( h->param.psz_dump_yuv ) { /* create or truncate the reconstructed video file */ - FILE *f = fopen( h->param.psz_dump_yuv, "w" ); + FILE *f = x264_fopen( h->param.psz_dump_yuv, "w" ); if( !f ) { x264_log( h, X264_LOG_ERROR, "dump_yuv: can't write to %s\n", h->param.psz_dump_yuv ); @@ -1523,18 +1696,10 @@ return NULL; } -/**************************************************************************** - * x264_encoder_reconfig: - ****************************************************************************/ -int x264_encoder_reconfig( x264_t *h, x264_param_t *param ) +/****************************************************************************/ +static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig ) { - /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */ - if( h->param.b_sliced_threads ) - if( x264_threadpool_wait_all( h ) < 0 ) - return -1; - - int rc_reconfig = 0; - h = h->thread[h->thread[0]->i_thread_phase]; + *rc_reconfig = 0; x264_set_aspect_ratio( h, param, 0 ); #define COPY(var) h->param.var = param->var COPY( i_frame_reference ); // but never uses more refs than initially specified @@ -1583,22 +1748,30 @@ if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 && param->rc.i_vbv_max_bitrate > 0 && param->rc.i_vbv_buffer_size > 0 ) { - rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate; - rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size; - rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate; + *rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate; + *rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size; + *rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate; COPY( rc.i_vbv_max_bitrate ); COPY( rc.i_vbv_buffer_size ); COPY( rc.i_bitrate ); } - rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant; - rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max; + *rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant; + *rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max; COPY( rc.f_rf_constant ); COPY( rc.f_rf_constant_max ); #undef COPY - mbcmp_init( h ); + return x264_validate_parameters( h, 0 ); +} - int ret = x264_validate_parameters( h, 0 ); +int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param ) +{ + int rc_reconfig; + int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig ); + + mbcmp_init( h ); + if( !ret ) + x264_sps_init( h->sps, h->param.i_sps_id, &h->param ); /* Supported reconfiguration options (1-pass only): * vbv-maxrate @@ -1612,6 +1785,25 @@ } /**************************************************************************** + * x264_encoder_reconfig: + ****************************************************************************/ +int x264_encoder_reconfig( x264_t *h, x264_param_t *param ) +{ + h = h->thread[h->thread[0]->i_thread_phase]; + x264_param_t param_save = h->reconfig_h->param; + h->reconfig_h->param = h->param; + + int rc_reconfig; + int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig ); + if( !ret ) + h->reconfig = 1; + else + h->reconfig_h->param = param_save; + + return ret; +} + +/**************************************************************************** * x264_encoder_parameters: ****************************************************************************/ void x264_encoder_parameters( x264_t *h, x264_param_t *param ) @@ -1630,6 +1822,7 @@ nal->i_payload= 0; nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8]; + nal->i_padding= 0; } /* if number of allocated nals is not enough, re-allocate a larger one. */ @@ -1663,6 +1856,30 @@ return x264_nal_check_buffer( h ); } +static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start, + int previous_nal_size, int necessary_size ) +{ + if( h0->nal_buffer_size < necessary_size ) + { + necessary_size *= 2; + uint8_t *buf = x264_malloc( necessary_size ); + if( !buf ) + return -1; + if( previous_nal_size ) + memcpy( buf, h0->nal_buffer, previous_nal_size ); + + intptr_t delta = buf - h0->nal_buffer; + for( int i = 0; i < start; i++ ) + h->out.nal[i].p_payload += delta; + + x264_free( h0->nal_buffer ); + h0->nal_buffer = buf; + h0->nal_buffer_size = necessary_size; + } + + return 0; +} + static int x264_encoder_encapsulate_nals( x264_t *h, int start ) { x264_t *h0 = h->thread[0]; @@ -1683,31 +1900,31 @@ /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */ int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64; - if( h0->nal_buffer_size < necessary_size ) - { - necessary_size *= 2; - uint8_t *buf = x264_malloc( necessary_size ); - if( !buf ) - return -1; - if( previous_nal_size ) - memcpy( buf, h0->nal_buffer, previous_nal_size ); - - intptr_t delta = buf - h0->nal_buffer; - for( int i = 0; i < start; i++ ) - h->out.nal[i].p_payload += delta; - - x264_free( h0->nal_buffer ); - h0->nal_buffer = buf; - h0->nal_buffer_size = necessary_size; - } + for( int i = start; i < h->out.i_nal; i++ ) + necessary_size += h->out.nal[i].i_padding; + if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) ) + return -1; uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size; for( int i = start; i < h->out.i_nal; i++ ) { - h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS; + int old_payload_len = h->out.nal[i].i_payload; + h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS || + h->param.i_avcintra_class; x264_nal_encode( h, nal_buffer, &h->out.nal[i] ); nal_buffer += h->out.nal[i].i_payload; + if( h->param.i_avcintra_class ) + { + h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD); + if( h->out.nal[i].i_padding > 0 ) + { + memset( nal_buffer, 0, h->out.nal[i].i_padding ); + nal_buffer += h->out.nal[i].i_padding; + h->out.nal[i].i_payload += h->out.nal[i].i_padding; + } + h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 ); + } } x264_emms(); @@ -2340,7 +2557,7 @@ } } -static int x264_slice_write( x264_t *h ) +static intptr_t x264_slice_write( x264_t *h ) { int i_skip; int mb_xy, i_mb_x, i_mb_y; @@ -2350,7 +2567,8 @@ * other inaccuracies. */ int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5; int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0; - int back_up_bitstream = slice_max_size || (!h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH); + int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH; + int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc; int starting_bits = bs_pos(&h->out.bs); int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1; int b_hpel = h->fdec->b_kept_as_ref; @@ -2358,9 +2576,10 @@ int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1; uint8_t *last_emu_check; #define BS_BAK_SLICE_MAX_SIZE 0 -#define BS_BAK_SLICE_MIN_MBS 1 -#define BS_BAK_ROW_VBV 2 - x264_bs_bak_t bs_bak[3]; +#define BS_BAK_CAVLC_OVERFLOW 1 +#define BS_BAK_SLICE_MIN_MBS 2 +#define BS_BAK_ROW_VBV 3 + x264_bs_bak_t bs_bak[4]; b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv; bs_realign( &h->out.bs ); @@ -2413,11 +2632,16 @@ x264_fdec_filter_row( h, i_mb_y, 0 ); } - if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream ) + if( back_up_bitstream ) { - x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 ); - if( slice_max_size && (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs ) - x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 ); + if( back_up_bitstream_cavlc ) + x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 ); + if( slice_max_size && !(i_mb_y & SLICE_MBAFF) ) + { + x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 ); + if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs ) + x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 ); + } } if( PARAM_INTERLACED ) @@ -2481,7 +2705,7 @@ h->mb.i_skip_intra = 0; h->mb.b_skip_mc = 0; h->mb.b_overflow = 0; - x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 ); + x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 ); goto reencode; } } @@ -2552,11 +2776,6 @@ cont: h->mb.b_reencode_mb = 0; -#if HAVE_VISUALIZE - if( h->param.b_visualize ) - x264_visualize_mb( h ); -#endif - /* save cache */ x264_macroblock_cache_save( h ); @@ -2732,10 +2951,11 @@ x264_frame_push_unused( src, dst->fdec ); // copy everything except the per-thread pointers and the constants. - memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) ); + memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.base) - offsetof(x264_t, i_frame) ); dst->param = src->param; dst->stat = src->stat; dst->pixf = src->pixf; + dst->reconfig = src->reconfig; } static void x264_thread_sync_stat( x264_t *dst, x264_t *src ) @@ -2750,12 +2970,6 @@ int i_slice_num = 0; int last_thread_mb = h->sh.i_last_mb; -#if HAVE_VISUALIZE - if( h->param.b_visualize ) - if( x264_visualize_init( h ) ) - goto fail; -#endif - /* init stats */ memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); h->mb.b_reencode_mb = 0; @@ -2801,14 +3015,6 @@ h->sh.i_first_mb -= h->mb.i_mb_stride; } -#if HAVE_VISUALIZE - if( h->param.b_visualize ) - { - x264_visualize_show( h ); - x264_visualize_close( h ); - } -#endif - return (void *)0; fail: @@ -2949,10 +3155,6 @@ thread_current = thread_oldest = h; } -#if HAVE_MMX - if( h->param.cpu&X264_CPU_SSE_MISALIGN ) - x264_cpu_mask_misalign_sse(); -#endif h->i_cpb_delay_pir_offset = h->i_cpb_delay_pir_offset_next; /* no data out */ @@ -3058,9 +3260,14 @@ if( h->i_frame == h->i_thread_frames - 1 ) h->i_reordered_pts_delay = h->fenc->i_reordered_pts; + if( h->reconfig ) + { + x264_encoder_reconfig_apply( h, &h->reconfig_h->param ); + h->reconfig = 0; + } if( h->fenc->param ) { - x264_encoder_reconfig( h, h->fenc->param ); + x264_encoder_reconfig_apply( h, h->fenc->param ); if( h->fenc->param->param_free ) { h->fenc->param->param_free( h->fenc->param ); @@ -3207,7 +3414,7 @@ bs_rbsp_trailing( &h->out.bs ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; } h->i_nal_type = i_nal_type; @@ -3259,14 +3466,19 @@ x264_sps_write( &h->out.bs, h->sps ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; + /* Pad AUD/SPS to 256 bytes like Panasonic */ + if( h->param.i_avcintra_class ) + h->out.nal[h->out.i_nal-1].i_padding = 256 - bs_pos( &h->out.bs ) / 8 - 2*NALU_OVERHEAD; + overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD; /* generate picture parameters */ x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); x264_pps_write( &h->out.bs, h->sps, h->pps ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; + if( h->param.i_avcintra_class ) + h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD; + overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD; } /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */ @@ -3277,7 +3489,7 @@ x264_sei_buffering_period_write( h, &h->out.bs ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } } @@ -3289,7 +3501,7 @@ h->fenc->extra_sei.payloads[i].payload_type ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; if( h->fenc->extra_sei.sei_free ) { h->fenc->extra_sei.sei_free( h->fenc->extra_sei.payloads[i].payload ); @@ -3306,7 +3518,8 @@ if( h->fenc->b_keyframe ) { - if( h->param.b_repeat_headers && h->fenc->i_frame == 0 ) + /* Avid's decoder strictly wants two SEIs for AVC-Intra so we can't insert the x264 SEI */ + if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class ) { /* identify ourself */ x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); @@ -3314,7 +3527,7 @@ return -1; if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } if( h->fenc->i_type != X264_TYPE_IDR ) @@ -3324,16 +3537,16 @@ x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } - if ( h->param.i_frame_packing >= 0 ) + if( h->param.i_frame_packing >= 0 ) { x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_frame_packing_write( h, &h->out.bs ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } } @@ -3344,7 +3557,7 @@ x264_sei_pic_timing_write( h, &h->out.bs ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } /* As required by Blu-ray. */ @@ -3355,12 +3568,54 @@ x264_sei_dec_ref_pic_marking_write( h, &h->out.bs ); if( x264_nal_end( h ) ) return -1; - overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1); + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } if( h->fenc->b_keyframe && h->param.b_intra_refresh ) h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay; + /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */ + if( h->param.i_avcintra_class ) + { + /* Write an empty filler NAL to mimic the AUD in the P2 format*/ + x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); + x264_filler_write( h, &h->out.bs, 0 ); + if( x264_nal_end( h ) ) + return -1; + overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; + + /* All lengths are magic lengths that decoders expect to see */ + /* "UMID" SEI */ + x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 ) + return -1; + if( x264_nal_end( h ) ) + return -1; + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; + + int unpadded_len; + int total_len; + if( h->param.i_height == 1080 ) + { + unpadded_len = 5780; + total_len = 17*512; + } + else + { + unpadded_len = 2900; + total_len = 9*512; + } + /* "VANC" SEI */ + x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 ) + return -1; + if( x264_nal_end( h ) ) + return -1; + + h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD; + overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + SEI_OVERHEAD; + } + /* Init the rate control */ /* FIXME: Include slice header bit cost. */ x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 ); @@ -3490,30 +3745,46 @@ pic_out->hrd_timing = h->fenc->hrd_timing; pic_out->prop.f_crf_avg = h->fdec->f_crf_avg; - while( filler > 0 ) + /* Filler in AVC-Intra mode is written as zero bytes to the last slice + * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */ + if( h->param.i_avcintra_class ) + { + x264_t *h0 = h->thread[0]; + int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler ); + if( ret < 0 ) + return -1; + memset( h->out.nal[0].p_payload + frame_size, 0, filler ); + h->out.nal[h->out.i_nal-1].i_payload += filler; + h->out.nal[h->out.i_nal-1].i_padding = filler; + frame_size += filler; + } + else { - int f, overhead; - overhead = (FILLER_OVERHEAD - h->param.b_annexb); - if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size ) - { - int next_size = filler - h->param.i_slice_max_size; - int overflow = X264_MAX( overhead - next_size, 0 ); - f = h->param.i_slice_max_size - overhead - overflow; - } - else - f = X264_MAX( 0, filler - overhead ); + while( filler > 0 ) + { + int f, overhead; + overhead = (FILLER_OVERHEAD - h->param.b_annexb); + if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size ) + { + int next_size = filler - h->param.i_slice_max_size; + int overflow = X264_MAX( overhead - next_size, 0 ); + f = h->param.i_slice_max_size - overhead - overflow; + } + else + f = X264_MAX( 0, filler - overhead ); - if( x264_bitstream_check_buffer_filler( h, f ) ) - return -1; - x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); - x264_filler_write( h, &h->out.bs, f ); - if( x264_nal_end( h ) ) - return -1; - int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 ); - if( total_size < 0 ) - return -1; - frame_size += total_size; - filler -= total_size; + if( x264_bitstream_check_buffer_filler( h, f ) ) + return -1; + x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); + x264_filler_write( h, &h->out.bs, f ); + if( x264_nal_end( h ) ) + return -1; + int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 ); + if( total_size < 0 ) + return -1; + frame_size += total_size; + filler -= total_size; + } } /* End bitstream, set output */ @@ -3985,6 +4256,7 @@ x264_cqm_delete( h ); x264_free( h->nal_buffer ); + x264_free( h->reconfig_h ); x264_analyse_free_costs( h ); if( h->i_thread_frames > 1 )
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/lookahead.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/lookahead.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * lookahead.c: high-level lookahead functions ***************************************************************************** - * Copyright (C) 2010-2013 Avail Media and x264 project + * Copyright (C) 2010-2014 Avail Media and x264 project * * Authors: Michael Kazmier <mkazmier@availmedia.com> * Alex Giladi <agiladi@availmedia.com> @@ -89,16 +89,11 @@ static void *x264_lookahead_thread( x264_t *h ) { - int shift; -#if HAVE_MMX - if( h->param.cpu&X264_CPU_SSE_MISALIGN ) - x264_cpu_mask_misalign_sse(); -#endif while( !h->lookahead->b_exit_thread ) { x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex ); x264_pthread_mutex_lock( &h->lookahead->next.mutex ); - shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size ); + int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size ); x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift ); x264_pthread_mutex_unlock( &h->lookahead->next.mutex ); if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.c
Changed
@@ -1,12 +1,12 @@ /***************************************************************************** * macroblock.c: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com> - * Henrik Gramner <hengar-6@student.ltu.se> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -157,10 +157,7 @@ return; } - M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0; - M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0; - M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0; - M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0; + CLEAR_16x16_NNZ( p ); h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -98,10 +98,10 @@ #define CLEAR_16x16_NNZ( p ) \ do\ {\ - M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\ - M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\ - M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\ - M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\ + M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\ } while(0) /* A special for loop that iterates branchlessly over each set
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/me.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * me.c: motion estimation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -365,14 +365,14 @@ /* hexagon */ COST_MV_X3_DIR( -2,0, -1, 2, 1, 2, costs ); - COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+3 ); + COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */ bcost <<= 3; COPY1_IF_LT( bcost, (costs[0]<<3)+2 ); COPY1_IF_LT( bcost, (costs[1]<<3)+3 ); COPY1_IF_LT( bcost, (costs[2]<<3)+4 ); - COPY1_IF_LT( bcost, (costs[3]<<3)+5 ); - COPY1_IF_LT( bcost, (costs[4]<<3)+6 ); - COPY1_IF_LT( bcost, (costs[5]<<3)+7 ); + COPY1_IF_LT( bcost, (costs[4]<<3)+5 ); + COPY1_IF_LT( bcost, (costs[5]<<3)+6 ); + COPY1_IF_LT( bcost, (costs[6]<<3)+7 ); if( bcost&7 ) { @@ -671,7 +671,7 @@ for( i = 0; i < xn-2; i += 3 ) { pixel *ref = p_fref_w+min_x+my*stride; - int sads[3]; + ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */ h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads ); for( int j = 0; j < 3; j++ ) {
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/me.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * me.h: motion estimation ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.c: ratecontrol ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Michael Niedermayer <michaelni@gmx.at> @@ -101,7 +101,7 @@ double vbv_max_rate; /* # of bits added to buffer_fill per second */ predictor_t *pred; /* predict frame size from satd */ int single_frame_vbv; - double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */ + float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */ /* ABR stuff */ int last_satd; @@ -653,8 +653,9 @@ h->param.rc.i_vbv_buffer_size ); } - int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000; - int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000; + int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000; + int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size; + int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size; /* Init HRD */ if( h->param.i_nal_hrd && b_init ) @@ -666,15 +667,12 @@ #define BR_SHIFT 6 #define CPB_SHIFT 4 - int bitrate = 1000*h->param.rc.i_vbv_max_bitrate; - int bufsize = 1000*h->param.rc.i_vbv_buffer_size; - // normalize HRD size and rate to the value / scale notation - h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 ); - h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); + h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 ); + h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); - h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 ); - h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); + h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 ); + h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); #undef CPB_SHIFT @@ -705,7 +703,7 @@ h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size; if( rc->b_vbv_min_rate ) - rc->bitrate = h->param.rc.i_bitrate * 1000.; + rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size; rc->buffer_rate = vbv_max_bitrate / rc->fps; rc->vbv_max_rate = vbv_max_bitrate; rc->buffer_size = vbv_buffer_size; @@ -761,7 +759,7 @@ else rc->qcompress = h->param.rc.f_qcompress; - rc->bitrate = h->param.rc.i_bitrate * 1000.; + rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.); rc->rate_tolerance = h->param.rc.f_rate_tolerance; rc->nmb = h->mb.i_mb_count; rc->last_non_b_pict_type = -1; @@ -872,7 +870,7 @@ char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" ); if( !mbtree_stats_in ) return -1; - rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" ); + rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" ); x264_free( mbtree_stats_in ); if( !rc->p_mbtree_stat_file_in ) { @@ -913,7 +911,7 @@ * so we'll at least try to roughly approximate this effect. */ res_factor_bits = powf( res_factor, 0.7 ); - if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 ) + if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 ) { x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" ); return -1; @@ -1140,7 +1138,7 @@ if( !rc->psz_stat_file_tmpname ) return -1; - rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" ); + rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" ); if( rc->p_stat_file_out == NULL ) { x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" ); @@ -1158,7 +1156,7 @@ if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name ) return -1; - rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" ); + rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" ); if( rc->p_mbtree_stat_file_out == NULL ) { x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" ); @@ -1338,7 +1336,7 @@ b_regular_file = x264_is_regular_file( rc->p_stat_file_out ); fclose( rc->p_stat_file_out ); if( h->i_frame >= rc->num_entries && b_regular_file ) - if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 ) + if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 ) { x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ); @@ -1350,7 +1348,7 @@ b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out ); fclose( rc->p_mbtree_stat_file_out ); if( h->i_frame >= rc->num_entries && b_regular_file ) - if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 ) + if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 ) { x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ); @@ -1398,7 +1396,7 @@ x264_emms(); if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) ) - x264_encoder_reconfig( h, zone->param ); + x264_encoder_reconfig_apply( h, zone->param ); rc->prev_zone = zone; if( h->param.rc.b_stat_read ) @@ -2108,15 +2106,25 @@ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale; if( rct->buffer_fill_final < 0 ) - x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale ); + { + double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale; + if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment ) + x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow ); + else + x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow ); + } rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 ); - rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration; - if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size ) + if( h->param.i_avcintra_class ) + rct->buffer_fill_final += buffer_size; + else + rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration; + + if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size ) { int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8; filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale; - bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8; + bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8; rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale; } else @@ -2719,7 +2727,7 @@ * we're adding or removing bits), and starting on the earliest frame that * can influence the buffer fill of that end frame. */ x264_ratecontrol_t *rcc = h->rc; - const double buffer_min = (over ? .1 : .1) * rcc->buffer_size; + const double buffer_min = .1 * rcc->buffer_size; const double buffer_max = .9 * rcc->buffer_size; double fill = fills[*t0-1]; double parity = over ? 1. : -1.;
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.h: ratecontrol ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -43,6 +43,7 @@ void x264_ratecontrol_delete( x264_t * ); void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ); +int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param ); void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets ); int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/rdo.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * rdo.c: rate-distortion optimization ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Jason Garrett-Glaser <darkshikari@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/set.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set: header writing ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -31,6 +31,7 @@ // Indexed by pic_struct values static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 }; +const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A}; static void transpose( uint8_t *buf, int w ) { @@ -91,7 +92,7 @@ bs_write( s, 8, payload_size-i ); for( i = 0; i < payload_size; i++ ) - bs_write(s, 8, payload[i] ); + bs_write( s, 8, payload[i] ); bs_rbsp_trailing( s ); bs_flush( s ); @@ -227,7 +228,8 @@ } /* FIXME: not sufficient for interlaced video */ - sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5; + sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 && + sps->i_chroma_format_idc == CHROMA_420; if( sps->vui.b_chroma_loc_info_present ) { sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc; @@ -249,7 +251,7 @@ // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable - sps->vui.b_bitstream_restriction = 1; + sps->vui.b_bitstream_restriction = param->i_keyint_max > 1; if( sps->vui.b_bitstream_restriction ) { sps->vui.b_motion_vectors_over_pic_boundaries = 1; @@ -421,7 +423,7 @@ pps->i_sps_id = sps->i_id; pps->b_cabac = param->b_cabac; - pps->b_pic_order = param->b_interlaced; + pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced; pps->i_num_slice_groups = 1; pps->i_num_ref_idx_l0_default_active = param->i_frame_reference; @@ -575,7 +577,7 @@ memcpy( payload, uuid, 16 ); sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - " - "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s", + "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s", X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts ); length = strlen(payload)+1; @@ -725,6 +727,49 @@ x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING ); } +int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s ) +{ + uint8_t data[512]; + const char *msg = "UMID"; + const int len = 497; + + memset( data, 0xff, len ); + memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) ); + memcpy( data+16, msg, strlen(msg) ); + + data[20] = 0x13; + /* These bytes appear to be some sort of frame/seconds counter in certain applications, + * but others jump around, so leave them as zero for now */ + data[21] = data[22] = 0; + + data[28] = 0x14; + data[36] = 0x60; + data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */ + + x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED ); + + return 0; +} + +int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len ) +{ + uint8_t data[6000]; + const char *msg = "VANC"; + if( len > sizeof(data) ) + { + x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len ); + return -1; + } + + memset( data, 0xff, len ); + memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) ); + memcpy( data+16, msg, strlen(msg) ); + + x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED ); + + return 0; +} + const x264_level_t x264_levels[] = { { 10, 1485, 99, 396, 64, 175, 64, 64, 0, 2, 0, 0, 1 },
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/set.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set.h: header writing ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -38,6 +38,8 @@ void x264_sei_pic_timing_write( x264_t *h, bs_t *s ); void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s ); void x264_sei_frame_packing_write( x264_t *h, bs_t *s ); +int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s ); +int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len ); void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type ); void x264_filler_write( x264_t *h, bs_t *s, int filler );
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype-cl.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype-cl.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead) ***************************************************************************** - * Copyright (C) 2012-2013 x264 project + * Copyright (C) 2012-2014 x264 project * * Authors: Steve Borho <sborho@multicorewareinc.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * slicetype.c: lookahead analysis ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Jason Garrett-Glaser <darkshikari@gmail.com> * Loren Merritt <lorenm@u.washington.edu> @@ -1022,9 +1022,12 @@ return i_score; } +/* Trade off precision in mbtree for increased range */ +#define MBTREE_PRECISION 0.5f + static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance ) { - int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 ); + int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION ); float weightdelta = 0.0; if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 ) weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]); @@ -1051,11 +1054,12 @@ int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32; int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] }; int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight}; - int *buf = h->scratch_buffer; + int16_t *buf = h->scratch_buffer; uint16_t *propagate_cost = frames[b]->i_propagate_cost; + uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b]; x264_emms(); - float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration); + float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION; /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */ if( !referenced ) @@ -1065,72 +1069,17 @@ { int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride; h->mc.mbtree_propagate_cost( buf, propagate_cost, - frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index, + frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index, frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width ); if( referenced ) propagate_cost += h->mb.i_mb_width; - for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ ) + + h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index], + bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 ); + if( b != p1 ) { - int propagate_amount = buf[h->mb.i_mb_x]; - /* Don't propagate for an intra block. */ - if( propagate_amount > 0 ) - { - /* Access width-2 bitfield. */ - int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT; - /* Follow the MVs to the previous frame(s). */ - for( int list = 0; list < 2; list++ ) - if( (lists_used >> list)&1 ) - { -#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1) - int listamount = propagate_amount; - /* Apply bipred weighting. */ - if( lists_used == 3 ) - listamount = (listamount * bipred_weights[list] + 32) >> 6; - - /* Early termination for simple case of mv0. */ - if( !M32( mvs[list][mb_index] ) ) - { - CLIP_ADD( ref_costs[list][mb_index], listamount ); - continue; - } - - int x = mvs[list][mb_index][0]; - int y = mvs[list][mb_index][1]; - int mbx = (x>>5)+h->mb.i_mb_x; - int mby = (y>>5)+h->mb.i_mb_y; - int idx0 = mbx + mby * h->mb.i_mb_stride; - int idx1 = idx0 + 1; - int idx2 = idx0 + h->mb.i_mb_stride; - int idx3 = idx0 + h->mb.i_mb_stride + 1; - x &= 31; - y &= 31; - int idx0weight = (32-y)*(32-x); - int idx1weight = (32-y)*x; - int idx2weight = y*(32-x); - int idx3weight = y*x; - - /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't - * be counted. */ - if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 ) - { - CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 ); - CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 ); - CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 ); - CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 ); - } - else /* Check offsets individually */ - { - if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 ) - CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 ); - if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 ) - CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 ); - if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 ) - CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 ); - if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 ) - CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 ); - } - } - } + h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index], + bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 ); } }
View file
x264-snapshot-20130723-2245.tar.bz2/filters/filters.c -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * filters.c: common filter functions ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Diogo Franco <diogomfranco@gmail.com> * Steven Walters <kemuri9@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/filters/filters.h -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * filters.h: common filter functions ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Diogo Franco <diogomfranco@gmail.com> * Steven Walters <kemuri9@gmail.com>
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/cache.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/cache.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cache.c: cache video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/crop.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/crop.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * crop.c: crop video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * James Darnley <james.darnley@gmail.com> @@ -105,8 +105,7 @@ for( int i = 0; i < output->img.planes; i++ ) { intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i]; - offset += h->dims[0] * h->csp->width[i]; - offset *= x264_cli_csp_depth_factor( output->img.csp ); + offset += h->dims[0] * h->csp->width[i] * x264_cli_csp_depth_factor( output->img.csp ); output->img.plane[i] += offset; } return 0;
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/depth.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/depth.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * depth.c: bit-depth conversion video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Oskar Arvidsson <oskar@irock.se> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/fix_vfr_pts.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/fix_vfr_pts.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * fix_vfr_pts.c: vfr pts fixing video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * internal.c: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * internal.h: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/resize.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/resize.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * resize.c: resize video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -45,8 +45,8 @@ #include <libavutil/opt.h> #include <libavutil/pixdesc.h> -#ifndef PIX_FMT_BGRA64 -#define PIX_FMT_BGRA64 PIX_FMT_NONE +#ifndef AV_PIX_FMT_BGRA64 +#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE #endif typedef struct @@ -94,9 +94,12 @@ for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ ) { - printf( "%s", x264_cli_csps[i].name ); - if( i+1 < X264_CSP_CLI_MAX ) - printf( ", " ); + if( x264_cli_csps[i].name ) + { + printf( "%s", x264_cli_csps[i].name ); + if( i+1 < X264_CSP_CLI_MAX ) + printf( ", " ); + } } printf( "\n" " - depth: 8 or 16 bits per pixel [keep current]\n" @@ -143,19 +146,19 @@ switch( csp&X264_CSP_MASK ) { case X264_CSP_YV12: /* specially handled via swapping chroma */ - case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P; + case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P; case X264_CSP_YV16: /* specially handled via swapping chroma */ - case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P; + case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P; case X264_CSP_YV24: /* specially handled via swapping chroma */ - case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P; - case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48 : PIX_FMT_RGB24; - case X264_CSP_BGR: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48 : PIX_FMT_BGR24; - case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64 : PIX_FMT_BGRA; + case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P; + case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48 : AV_PIX_FMT_RGB24; + case X264_CSP_BGR: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48 : AV_PIX_FMT_BGR24; + case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA; /* the next csp has no equivalent 16bit depth in swscale */ - case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE : PIX_FMT_NV12; + case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12; /* the next csp is no supported by swscale at all */ case X264_CSP_NV16: - default: return PIX_FMT_NONE; + default: return AV_PIX_FMT_NONE; } } @@ -175,12 +178,12 @@ int pix_fmt = convert_csp_to_pix_fmt( csp ); // first determine the base csp int ret = X264_CSP_NONE; - const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt; - if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name ) + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt ); + if( !pix_desc || !pix_desc->name ) return ret; const char *pix_fmt_name = pix_desc->name; - int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL); + int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL); int is_bgr = !!strstr( pix_fmt_name, "bgr" ); if( is_bgr || is_rgb ) { @@ -243,8 +246,11 @@ if( strlen( str_csp ) == 0 ) csp = info->csp & X264_CSP_MASK; else - for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); ) - csp--; + for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- ) + { + if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) ) + break; + } FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp ); h->dst_csp = csp; if( depth == 16 ) @@ -392,7 +398,7 @@ h->scale = input_prop; if( !h->buffer_allocated ) { - if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) ) + if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) ) return -1; h->buffer_allocated = 1; } @@ -462,11 +468,11 @@ int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH ); /* confirm swscale can support this conversion */ - FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE, + FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE, "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ), info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 ); FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) ) - FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE, + FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE, "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ), h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 ); FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/select_every.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/select_every.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * select_every.c: select-every video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/source.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/source.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * source.c: source video filter ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/video.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * video.c: video filters ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/filters/video/video.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * video.h: video filters ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/input/avs.c -> x264-snapshot-20140321-2245.tar.bz2/input/avs.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * avs.c: avisynth input ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -35,7 +35,7 @@ #define avs_address dlsym #else #include <windows.h> -#define avs_open LoadLibrary( "avisynth" ) +#define avs_open LoadLibraryW( L"avisynth" ) #define avs_close FreeLibrary #define avs_address GetProcAddress #endif @@ -172,7 +172,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt ) { - FILE *fh = fopen( psz_filename, "r" ); + FILE *fh = x264_fopen( psz_filename, "r" ); if( !fh ) return -1; FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename ); @@ -192,7 +192,16 @@ if( avs_version <= 0 ) return -1; x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version ); + +#ifdef _WIN32 + /* Avisynth doesn't support Unicode filenames. */ + char ansi_filename[MAX_PATH]; + FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" ); + AVS_Value arg = avs_new_value_string( ansi_filename ); +#else AVS_Value arg = avs_new_value_string( psz_filename ); +#endif + AVS_Value res; char *filename_ext = get_filename_extension( psz_filename ); @@ -329,11 +338,11 @@ info->csp = X264_CSP_I420; #if HAVE_SWSCALE else if( avs_is_yuy2( vi ) ) - info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER; + info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER; else if( avs_is_yv411( vi ) ) - info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER; + info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER; else if( avs_is_y8( vi ) ) - info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER; + info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER; #endif else info->csp = X264_CSP_NONE; @@ -352,7 +361,7 @@ if( cli_csp ) pic->img.planes = cli_csp->planes; #if HAVE_SWSCALE - else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) ) + else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) ) pic->img.planes = 3; else pic->img.planes = 1; //y8 and yuy2 are one plane
View file
x264-snapshot-20130723-2245.tar.bz2/input/ffms.c -> x264-snapshot-20140321-2245.tar.bz2/input/ffms.c
Changed
@@ -1,10 +1,11 @@ /***************************************************************************** * ffms.c: ffmpegsource input ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Mike Gurlitz <mike.gurlitz@gmail.com> * Steven Walters <kemuri9@gmail.com> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,8 +35,6 @@ #ifdef _WIN32 #include <windows.h> -#else -#define SetConsoleTitle(t) #endif typedef struct @@ -60,7 +59,7 @@ char buf[200]; sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total ); fprintf( stderr, "%s \r", buf+5 ); - SetConsoleTitle( buf ); + x264_cli_set_console_title( buf ); fflush( stderr ); return 0; } @@ -70,9 +69,9 @@ { switch( csp ) { - case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P; - case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P; - case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P; + case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P; + case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P; + case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P; default: return csp; } } @@ -82,7 +81,21 @@ ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) ); if( !h ) return -1; + +#ifdef __MINGW32__ + /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */ FFMS_Init( 0, 0 ); + char src_filename[MAX_PATH]; + char idx_filename[MAX_PATH]; + FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" ); + if( opt->index_file ) + FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" ); +#else + FFMS_Init( 0, 1 ); + char *src_filename = psz_filename; + char *idx_filename = opt->index_file; +#endif + FFMS_ErrorInfo e; e.BufferSize = 0; int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW; @@ -90,29 +103,29 @@ FFMS_Index *idx = NULL; if( opt->index_file ) { - struct stat index_s, input_s; - if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) && - input_s.st_mtime < index_s.st_mtime ) - idx = FFMS_ReadIndex( opt->index_file, &e ); + x264_struct_stat index_s, input_s; + if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) && + input_s.st_mtime < index_s.st_mtime && index_s.st_size ) + idx = FFMS_ReadIndex( idx_filename, &e ); } if( !idx ) { if( opt->progress ) { - idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e ); + idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e ); fprintf( stderr, " \r" ); } else - idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e ); + idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e ); FAIL_IF_ERROR( !idx, "could not create index\n" ) - if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) ) + if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) ) x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" ); } int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e ); FAIL_IF_ERROR( trackno < 0, "could not find video track\n" ) - h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e ); + h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e ); FAIL_IF_ERROR( !h->video_source, "could not create video source\n" ) h->track = FFMS_GetTrackFromVideo( h->video_source );
View file
x264-snapshot-20130723-2245.tar.bz2/input/input.c -> x264-snapshot-20140321-2245.tar.bz2/input/input.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * input.c: common input functions ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -42,7 +42,8 @@ int x264_cli_csp_is_invalid( int csp ) { int csp_mask = csp & X264_CSP_MASK; - return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER; + return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || + csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER; } int x264_cli_csp_depth_factor( int csp ) @@ -74,7 +75,7 @@ return size; } -int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height ) +static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align ) { memset( pic, 0, sizeof(cli_pic_t) ); int csp_mask = csp & X264_CSP_MASK; @@ -87,15 +88,29 @@ pic->img.height = height; for( int i = 0; i < pic->img.planes; i++ ) { - pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) ); - if( !pic->img.plane[i] ) - return -1; - pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp ); + int stride = width * x264_cli_csps[csp_mask].width[i]; + stride *= x264_cli_csp_depth_factor( csp ); + stride = ALIGN( stride, align ); + uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride; + pic->img.plane[i] = x264_malloc( size ); + if( !pic->img.plane[i] ) + return -1; + pic->img.stride[i] = stride; } return 0; } +int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height ) +{ + return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 ); +} + +int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height ) +{ + return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN ); +} + void x264_cli_pic_clean( cli_pic_t *pic ) { for( int i = 0; i < pic->img.planes; i++ )
View file
x264-snapshot-20130723-2245.tar.bz2/input/input.h -> x264-snapshot-20140321-2245.tar.bz2/input/input.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * input.h: file input ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -124,6 +124,7 @@ int x264_cli_csp_is_invalid( int csp ); int x264_cli_csp_depth_factor( int csp ); int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height ); +int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height ); void x264_cli_pic_clean( cli_pic_t *pic ); uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane ); uint64_t x264_cli_pic_size( int csp, int width, int height );
View file
x264-snapshot-20130723-2245.tar.bz2/input/lavf.c -> x264-snapshot-20140321-2245.tar.bz2/input/lavf.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * lavf.c: libavformat input ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Mike Gurlitz <mike.gurlitz@gmail.com> * Steven Walters <kemuri9@gmail.com> @@ -53,9 +53,9 @@ { switch( csp ) { - case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P; - case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P; - case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P; + case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P; + case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P; + case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P; default: return csp; } } @@ -162,7 +162,7 @@ if( opt->resolution ) { av_dict_set( &options, "video_size", opt->resolution, 0 ); - const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P ); + const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P ); av_dict_set( &options, "pixel_format", csp, 0 ); } @@ -210,7 +210,7 @@ /* avisynth stores rgb data vertically flipped. */ if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) && - (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) ) + (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) ) info->csp |= X264_CSP_VFLIP; *p_handle = h;
View file
x264-snapshot-20130723-2245.tar.bz2/input/raw.c -> x264-snapshot-20140321-2245.tar.bz2/input/raw.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw input ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -55,8 +55,11 @@ FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" ) if( opt->colorspace ) { - for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); ) - info->csp--; + for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- ) + { + if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) ) + break; + } FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace ); } else /* default */ @@ -70,7 +73,7 @@ if( !strcmp( psz_filename, "-" ) ) h->fh = stdin; else - h->fh = fopen( psz_filename, "rb" ); + h->fh = x264_fopen( psz_filename, "rb" ); if( h->fh == NULL ) return -1; @@ -99,14 +102,14 @@ return 0; } -static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h ) +static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc ) { int error = 0; int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp ); for( int i = 0; i < pic->img.planes && !error; i++ ) { error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i]; - if( h->bit_depth & 7 ) + if( bit_depth_uc ) { /* upconvert non 16bit high depth planes to 16bit using the same * algorithm as used in the depth filter. */ @@ -131,13 +134,13 @@ else while( i_frame > h->next_frame ) { - if( read_frame_internal( pic, h ) ) + if( read_frame_internal( pic, h, 0 ) ) return -1; h->next_frame++; } } - if( read_frame_internal( pic, h ) ) + if( read_frame_internal( pic, h, h->bit_depth & 7 ) ) return -1; h->next_frame = i_frame+1;
View file
x264-snapshot-20130723-2245.tar.bz2/input/thread.c -> x264-snapshot-20140321-2245.tar.bz2/input/thread.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * thread.c: threaded input ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/input/timecode.c -> x264-snapshot-20140321-2245.tar.bz2/input/timecode.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * timecode.c: timecode file input ***************************************************************************** - * Copyright (C) 2010-2013 x264 project + * Copyright (C) 2010-2014 x264 project * * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com> * @@ -368,7 +368,7 @@ timecode_input.picture_alloc = h->input.picture_alloc; timecode_input.picture_clean = h->input.picture_clean; - tcfile_in = fopen( psz_filename, "rb" ); + tcfile_in = x264_fopen( psz_filename, "rb" ); FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename ) else if( !x264_is_regular_file( tcfile_in ) ) {
View file
x264-snapshot-20130723-2245.tar.bz2/input/y4m.c -> x264-snapshot-20140321-2245.tar.bz2/input/y4m.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * y4m.c: y4m input ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -81,7 +81,7 @@ if( !strcmp( psz_filename, "-" ) ) h->fh = stdin; else - h->fh = fopen(psz_filename, "rb"); + h->fh = x264_fopen(psz_filename, "rb"); if( h->fh == NULL ) return -1; @@ -223,7 +223,7 @@ return 0; } -static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h ) +static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc ) { size_t slen = strlen( Y4M_FRAME_MAGIC ); int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp ); @@ -249,7 +249,7 @@ for( i = 0; i < pic->img.planes && !error; i++ ) { error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i]; - if( h->bit_depth & 7 ) + if( bit_depth_uc ) { /* upconvert non 16bit high depth planes to 16bit using the same * algorithm as used in the depth filter. */ @@ -274,13 +274,13 @@ else while( i_frame > h->next_frame ) { - if( read_frame_internal( pic, h ) ) + if( read_frame_internal( pic, h, 0 ) ) return -1; h->next_frame++; } } - if( read_frame_internal( pic, h ) ) + if( read_frame_internal( pic, h, h->bit_depth & 7 ) ) return -1; h->next_frame = i_frame+1;
View file
x264-snapshot-20130723-2245.tar.bz2/output/flv.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * flv.c: flv muxer ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Kieran Kunhya <kieran@kunhya.com> * @@ -75,11 +75,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt ) { - flv_hnd_t *p_flv = malloc( sizeof(*p_flv) ); *p_handle = NULL; + flv_hnd_t *p_flv = calloc( 1, sizeof(flv_hnd_t) ); if( !p_flv ) return -1; - memset( p_flv, 0, sizeof(*p_flv) ); p_flv->b_dts_compress = opt->use_dts_compress;
View file
x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.c: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Kieran Kunhya <kieran@kunhya.com> * @@ -87,16 +87,14 @@ flv_buffer *flv_create_writer( const char *filename ) { - flv_buffer *c = malloc( sizeof(*c) ); - + flv_buffer *c = calloc( 1, sizeof(flv_buffer) ); if( !c ) return NULL; - memset( c, 0, sizeof(*c) ); if( !strcmp( filename, "-" ) ) c->fp = stdout; else - c->fp = fopen( filename, "wb" ); + c->fp = x264_fopen( filename, "wb" ); if( !c->fp ) { free( c );
View file
x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.h -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.h: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Kieran Kunhya <kieran@kunhya.com> *
View file
x264-snapshot-20130723-2245.tar.bz2/output/matroska.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * matroska.c: matroska muxer ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Mike Matsnev <mike@haali.su> * @@ -33,6 +33,7 @@ int width, height, d_width, d_height; int display_size_units; + int stereo_mode; int64_t frame_duration; @@ -44,16 +45,11 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt ) { - mkv_hnd_t *p_mkv; - *p_handle = NULL; - - p_mkv = malloc( sizeof(*p_mkv) ); + mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) ); if( !p_mkv ) return -1; - memset( p_mkv, 0, sizeof(*p_mkv) ); - p_mkv->w = mk_create_writer( psz_filename ); if( !p_mkv->w ) { @@ -84,6 +80,7 @@ p_mkv->width = p_mkv->d_width = p_param->i_width; p_mkv->height = p_mkv->d_height = p_param->i_height; p_mkv->display_size_units = DS_PIXELS; + p_mkv->stereo_mode = p_param->i_frame_packing; if( p_param->vui.i_sar_width && p_param->vui.i_sar_height && p_param->vui.i_sar_width != p_param->vui.i_sar_height ) @@ -152,7 +149,7 @@ ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC", avcC, avcC_len, p_mkv->frame_duration, 50000, p_mkv->width, p_mkv->height, - p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units ); + p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode ); if( ret < 0 ) return ret;
View file
x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.c: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Mike Matsnev <mike@haali.su> * @@ -74,10 +74,9 @@ } else { - c = malloc( sizeof(*c) ); + c = calloc( 1, sizeof(mk_context) ); if( !c ) return NULL; - memset( c, 0, sizeof(*c) ); } c->parent = parent; @@ -291,12 +290,10 @@ mk_writer *mk_create_writer( const char *filename ) { - mk_writer *w = malloc( sizeof(*w) ); + mk_writer *w = calloc( 1, sizeof(mk_writer) ); if( !w ) return NULL; - memset( w, 0, sizeof(*w) ); - w->root = mk_create_context( w, NULL, 0 ); if( !w->root ) { @@ -307,7 +304,7 @@ if( !strcmp( filename, "-" ) ) w->fp = stdout; else - w->fp = fopen( filename, "wb" ); + w->fp = x264_fopen( filename, "wb" ); if( !w->fp ) { mk_destroy_contexts( w ); @@ -320,13 +317,15 @@ return w; } +static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13}; + int mk_write_header( mk_writer *w, const char *writing_app, const char *codec_id, const void *codec_private, unsigned codec_private_size, int64_t default_frame_duration, int64_t timescale, unsigned width, unsigned height, - unsigned d_width, unsigned d_height, int display_size_units ) + unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode ) { mk_context *c, *ti, *v; @@ -382,6 +381,8 @@ CHECK( mk_write_uint( v, 0x54b2, display_size_units ) ); CHECK( mk_write_uint( v, 0x54b0, d_width ) ); CHECK( mk_write_uint( v, 0x54ba, d_height ) ); + if( stereo_mode >= 0 && stereo_mode <= 5 ) + CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) ); CHECK( mk_close_context( v, 0 ) ); CHECK( mk_close_context( ti, 0 ) );
View file
x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.h -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.h: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2013 x264 project + * Copyright (C) 2005-2014 x264 project * * Authors: Mike Matsnev <mike@haali.su> * @@ -42,7 +42,7 @@ int64_t default_frame_duration, int64_t timescale, unsigned width, unsigned height, - unsigned d_width, unsigned d_height, int display_size_units ); + unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode ); int mk_start_frame( mk_writer *w ); int mk_add_frame_data( mk_writer *w, const void *data, unsigned size );
View file
x264-snapshot-20130723-2245.tar.bz2/output/mp4.c -> x264-snapshot-20140321-2245.tar.bz2/output/mp4.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mp4.c: mp4 muxer ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -27,13 +27,8 @@ #include "output.h" #include <gpac/isomedia.h> -#if HAVE_GF_MALLOC -#undef malloc -#undef free -#undef realloc -#define malloc gf_malloc -#define free gf_free -#define realloc gf_realloc +#ifdef _WIN32 +#include <windows.h> #endif typedef struct @@ -170,20 +165,25 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt ) { - mp4_hnd_t *p_mp4; - *p_handle = NULL; - FILE *fh = fopen( psz_filename, "w" ); + FILE *fh = x264_fopen( psz_filename, "w" ); if( !fh ) return -1; FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename ) fclose( fh ); - if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) ) + mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) ); + if( !p_mp4 ) return -1; - memset( p_mp4, 0, sizeof(mp4_hnd_t) ); +#ifdef _WIN32 + /* GPAC doesn't support Unicode filenames. */ + char ansi_filename[MAX_PATH]; + FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" ) + p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL ); +#else p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL ); +#endif p_mp4->b_dts_compress = opt->use_dts_compress;
View file
x264-snapshot-20140321-2245.tar.bz2/output/mp4_lsmash.c
Added
@@ -0,0 +1,419 @@ +/***************************************************************************** + * mp4_lsmash.c: mp4 muxer using L-SMASH + ***************************************************************************** + * Copyright (C) 2003-2014 x264 project + * + * Authors: Laurent Aimar <fenrir@via.ecp.fr> + * Loren Merritt <lorenm@u.washington.edu> + * Yusuke Nakamura <muken.the.vfrmaniac@gmail.com> + * Takashi Hirata <silverfilain@gmail.com> + * golgol7777 <golgol7777@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "output.h" +#include <lsmash.h> + +#define H264_NALU_LENGTH_SIZE 4 + +/*******************/ + +#define MP4_LOG_ERROR( ... ) x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ ) +#define MP4_LOG_WARNING( ... ) x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ ) +#define MP4_LOG_INFO( ... ) x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ ) +#define MP4_FAIL_IF_ERR( cond, ... ) FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ ) + +/* For close_file() */ +#define MP4_LOG_IF_ERR( cond, ... )\ +if( cond )\ +{\ + MP4_LOG_ERROR( __VA_ARGS__ );\ +} + +/* For open_file() */ +#define MP4_FAIL_IF_ERR_EX( cond, ... )\ +if( cond )\ +{\ + remove_mp4_hnd( p_mp4 );\ + MP4_LOG_ERROR( __VA_ARGS__ );\ + return -1;\ +} + +/*******************/ + +typedef struct +{ + lsmash_root_t *p_root; + lsmash_video_summary_t *summary; + int b_stdout; + uint32_t i_movie_timescale; + uint32_t i_video_timescale; + uint32_t i_track; + uint32_t i_sample_entry; + uint64_t i_time_inc; + int64_t i_start_offset; + uint64_t i_first_cts; + uint64_t i_prev_dts; + uint32_t i_sei_size; + uint8_t *p_sei_buffer; + int i_numframe; + int64_t i_init_delta; + int i_delay_frames; + int b_dts_compress; + int i_dts_compress_multiplier; + int b_use_recovery; + int b_fragments; +} mp4_hnd_t; + +/*******************/ + +static void remove_mp4_hnd( hnd_t handle ) +{ + mp4_hnd_t *p_mp4 = handle; + if( !p_mp4 ) + return; + if( p_mp4->p_sei_buffer ) + { + free( p_mp4->p_sei_buffer ); + p_mp4->p_sei_buffer = NULL; + } + if( p_mp4->p_root ) + { + lsmash_destroy_root( p_mp4->p_root ); + p_mp4->p_root = NULL; + } + free( p_mp4 ); +} + +/*******************/ + +static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts ) +{ + mp4_hnd_t *p_mp4 = handle; + + if( !p_mp4 ) + return 0; + + if( p_mp4->p_root ) + { + double actual_duration = 0; + if( p_mp4->i_track ) + { + /* Flush the rest of samples and add the last sample_delta. */ + uint32_t last_delta = largest_pts - second_largest_pts; + MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ), + "failed to flush the rest of samples.\n" ); + + if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 ) /* avoid zero division */ + actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale; + else + MP4_LOG_ERROR( "timescale is broken.\n" ); + + /* + * Declare the explicit time-line mapping. + * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment + * is given by not the movie timescale but rather the media timescale. + * The reason is that ISO media have two time-lines, presentation and media time-line, + * and an edit maps the presentation time-line to the media time-line. + * According to QuickTime file format specification and the actual playback in QuickTime Player, + * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes + * the track's media_rate so that the entire media can be used by the track. + * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly. + * Note: Any demuxers should follow the Edit List Box if it exists. + */ + lsmash_edit_t edit; + edit.duration = actual_duration; + edit.start_time = p_mp4->i_first_cts; + edit.rate = ISOM_EDIT_MODE_NORMAL; + if( !p_mp4->b_fragments ) + { + MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ), + "failed to set timeline map for video.\n" ); + } + else if( !p_mp4->b_stdout ) + MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ), + "failed to update timeline map for video.\n" ); + } + + MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" ); + } + + remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */ + + return 0; +} + +static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt ) +{ + *p_handle = NULL; + + int b_regular = strcmp( psz_filename, "-" ); + b_regular = b_regular && x264_is_regular_file_path( psz_filename ); + if( b_regular ) + { + FILE *fh = x264_fopen( psz_filename, "wb" ); + MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename ); + b_regular = x264_is_regular_file( fh ); + fclose( fh ); + } + + mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) ); + MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" ); + + p_mp4->b_dts_compress = opt->use_dts_compress; + p_mp4->b_use_recovery = 0; // we don't really support recovery + p_mp4->b_fragments = !b_regular; + p_mp4->b_stdout = !strcmp( psz_filename, "-" ); + + p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE ); + MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" ); + + p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO ); + MP4_FAIL_IF_ERR_EX( !p_mp4->summary, + "failed to allocate memory for summary information of video.\n" ); + p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO; + + *p_handle = p_mp4; + + return 0; +} + +static int set_param( hnd_t handle, x264_param_t *p_param ) +{ + mp4_hnd_t *p_mp4 = handle; + uint64_t i_media_timescale; + + p_mp4->i_delay_frames = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0; + p_mp4->i_dts_compress_multiplier = p_mp4->b_dts_compress * p_mp4->i_delay_frames + 1; + + i_media_timescale = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier; + p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier; + MP4_FAIL_IF_ERR( i_media_timescale > UINT32_MAX, "MP4 media timescale %"PRIu64" exceeds maximum\n", i_media_timescale ); + + /* Select brands. */ + lsmash_brand_type brands[6] = { 0 }; + uint32_t brand_count = 0; + brands[brand_count++] = ISOM_BRAND_TYPE_MP42; + brands[brand_count++] = ISOM_BRAND_TYPE_MP41; + brands[brand_count++] = ISOM_BRAND_TYPE_ISOM; + if( p_mp4->b_use_recovery ) + { + brands[brand_count++] = ISOM_BRAND_TYPE_AVC1; /* sdtp, sgpd, sbgp and visual roll recovery grouping */ + if( p_param->b_open_gop ) + brands[brand_count++] = ISOM_BRAND_TYPE_ISO6; /* cslg and visual random access grouping */ + } + + /* Set movie parameters. */ + lsmash_movie_parameters_t movie_param; + lsmash_initialize_movie_parameters( &movie_param ); + movie_param.major_brand = ISOM_BRAND_TYPE_MP42; + movie_param.brands = brands; + movie_param.number_of_brands = brand_count; + MP4_FAIL_IF_ERR( lsmash_set_movie_parameters( p_mp4->p_root, &movie_param ), + "failed to set movie parameters.\n" ); + p_mp4->i_movie_timescale = lsmash_get_movie_timescale( p_mp4->p_root ); + MP4_FAIL_IF_ERR( !p_mp4->i_movie_timescale, "movie timescale is broken.\n" ); + + /* Create a video track. */ + p_mp4->i_track = lsmash_create_track( p_mp4->p_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK ); + MP4_FAIL_IF_ERR( !p_mp4->i_track, "failed to create a video track.\n" ); + + p_mp4->summary->width = p_param->i_width; + p_mp4->summary->height = p_param->i_height; + uint32_t i_display_width = p_param->i_width << 16; + uint32_t i_display_height = p_param->i_height << 16; + if( p_param->vui.i_sar_width && p_param->vui.i_sar_height ) + { + double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height; + if( sar > 1.0 ) + i_display_width *= sar; + else + i_display_height /= sar; + p_mp4->summary->par_h = p_param->vui.i_sar_width; + p_mp4->summary->par_v = p_param->vui.i_sar_height; + } + p_mp4->summary->color.primaries_index = p_param->vui.i_colorprim; + p_mp4->summary->color.transfer_index = p_param->vui.i_transfer; + p_mp4->summary->color.matrix_index = p_param->vui.i_colmatrix >= 0 ? p_param->vui.i_colmatrix : ISOM_MATRIX_INDEX_UNSPECIFIED; + p_mp4->summary->color.full_range = p_param->vui.b_fullrange >= 0 ? p_param->vui.b_fullrange : 0; + + /* Set video track parameters. */ + lsmash_track_parameters_t track_param; + lsmash_initialize_track_parameters( &track_param ); + lsmash_track_mode track_mode = ISOM_TRACK_ENABLED | ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW; + track_param.mode = track_mode; + track_param.display_width = i_display_width; + track_param.display_height = i_display_height; + MP4_FAIL_IF_ERR( lsmash_set_track_parameters( p_mp4->p_root, p_mp4->i_track, &track_param ), + "failed to set track parameters for video.\n" ); + + /* Set video media parameters. */ + lsmash_media_parameters_t media_param; + lsmash_initialize_media_parameters( &media_param ); + media_param.timescale = i_media_timescale; + media_param.media_handler_name = "L-SMASH Video Media Handler"; + if( p_mp4->b_use_recovery ) + { + media_param.roll_grouping = p_param->b_intra_refresh; + media_param.rap_grouping = p_param->b_open_gop; + } + MP4_FAIL_IF_ERR( lsmash_set_media_parameters( p_mp4->p_root, p_mp4->i_track, &media_param ), + "failed to set media parameters for video.\n" ); + p_mp4->i_video_timescale = lsmash_get_media_timescale( p_mp4->p_root, p_mp4->i_track ); + MP4_FAIL_IF_ERR( !p_mp4->i_video_timescale, "media timescale for video is broken.\n" ); + + return 0; +} + +static int write_headers( hnd_t handle, x264_nal_t *p_nal ) +{ + mp4_hnd_t *p_mp4 = handle; + + uint32_t sps_size = p_nal[0].i_payload - H264_NALU_LENGTH_SIZE; + uint32_t pps_size = p_nal[1].i_payload - H264_NALU_LENGTH_SIZE; + uint32_t sei_size = p_nal[2].i_payload; + + uint8_t *sps = p_nal[0].p_payload + H264_NALU_LENGTH_SIZE; + uint8_t *pps = p_nal[1].p_payload + H264_NALU_LENGTH_SIZE; + uint8_t *sei = p_nal[2].p_payload; + + lsmash_codec_specific_t *cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + + lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)cs->data.structured; + param->lengthSizeMinusOne = H264_NALU_LENGTH_SIZE - 1; + + /* SPS + * The remaining parameters are automatically set by SPS. */ + if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_SPS, sps, sps_size ) ) + { + MP4_LOG_ERROR( "failed to append SPS.\n" ); + return -1; + } + + /* PPS */ + if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_PPS, pps, pps_size ) ) + { + MP4_LOG_ERROR( "failed to append PPS.\n" ); + return -1; + } + + if( lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ) ) + { + MP4_LOG_ERROR( "failed to add H.264 specific info.\n" ); + return -1; + } + + lsmash_destroy_codec_specific_data( cs ); + + /* Additional extensions */ + /* Bitrate info */ + cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE, + LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED ); + if( cs ) + lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ); + lsmash_destroy_codec_specific_data( cs ); + + p_mp4->i_sample_entry = lsmash_add_sample_entry( p_mp4->p_root, p_mp4->i_track, p_mp4->summary ); + MP4_FAIL_IF_ERR( !p_mp4->i_sample_entry, + "failed to add sample entry for video.\n" ); + + /* SEI */ + p_mp4->p_sei_buffer = malloc( sei_size ); + MP4_FAIL_IF_ERR( !p_mp4->p_sei_buffer, + "failed to allocate sei transition buffer.\n" ); + memcpy( p_mp4->p_sei_buffer, sei, sei_size ); + p_mp4->i_sei_size = sei_size; + + return sei_size + sps_size + pps_size; +} + +static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_t *p_picture ) +{ + mp4_hnd_t *p_mp4 = handle; + uint64_t dts, cts; + + if( !p_mp4->i_numframe ) + { + p_mp4->i_start_offset = p_picture->i_dts * -1; + p_mp4->i_first_cts = p_mp4->b_dts_compress ? 0 : p_mp4->i_start_offset * p_mp4->i_time_inc; + if( p_mp4->b_fragments ) + { + lsmash_edit_t edit; + edit.duration = ISOM_EDIT_DURATION_UNKNOWN32; /* QuickTime doesn't support 64bit duration. */ + edit.start_time = p_mp4->i_first_cts; + edit.rate = ISOM_EDIT_MODE_NORMAL; + MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ), + "failed to set timeline map for video.\n" ); + } + } + + lsmash_sample_t *p_sample = lsmash_create_sample( i_size + p_mp4->i_sei_size ); + MP4_FAIL_IF_ERR( !p_sample, + "failed to create a video sample data.\n" ); + + if( p_mp4->p_sei_buffer ) + { + memcpy( p_sample->data, p_mp4->p_sei_buffer, p_mp4->i_sei_size ); + free( p_mp4->p_sei_buffer ); + p_mp4->p_sei_buffer = NULL; + } + + memcpy( p_sample->data + p_mp4->i_sei_size, p_nalu, i_size ); + p_mp4->i_sei_size = 0; + + if( p_mp4->b_dts_compress ) + { + if( p_mp4->i_numframe == 1 ) + p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc; + dts = p_mp4->i_numframe > p_mp4->i_delay_frames + ? p_picture->i_dts * p_mp4->i_time_inc + : p_mp4->i_numframe * (p_mp4->i_init_delta / p_mp4->i_dts_compress_multiplier); + cts = p_picture->i_pts * p_mp4->i_time_inc; + } + else + { + dts = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc; + cts = (p_picture->i_pts + p_mp4->i_start_offset) * p_mp4->i_time_inc; + } + + p_sample->dts = dts; + p_sample->cts = cts; + p_sample->index = p_mp4->i_sample_entry; + p_sample->prop.ra_flags = p_picture->b_keyframe ? ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC : ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE; + + if( p_mp4->b_fragments && p_mp4->i_numframe && p_sample->prop.ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE ) + { + MP4_FAIL_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, p_sample->dts - p_mp4->i_prev_dts ), + "failed to flush the rest of samples.\n" ); + MP4_FAIL_IF_ERR( lsmash_create_fragment_movie( p_mp4->p_root ), + "failed to create a movie fragment.\n" ); + } + + /* Append data per sample. */ + MP4_FAIL_IF_ERR( lsmash_append_sample( p_mp4->p_root, p_mp4->i_track, p_sample ), + "failed to append a video frame.\n" ); + + p_mp4->i_prev_dts = dts; + p_mp4->i_numframe++; + + return i_size; +} + +const cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };
View file
x264-snapshot-20130723-2245.tar.bz2/output/output.h -> x264-snapshot-20140321-2245.tar.bz2/output/output.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * output.h: x264 file output modules ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20130723-2245.tar.bz2/output/raw.c -> x264-snapshot-20140321-2245.tar.bz2/output/raw.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw muxer ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -30,7 +30,7 @@ { if( !strcmp( psz_filename, "-" ) ) *p_handle = stdout; - else if( !(*p_handle = fopen( psz_filename, "w+b" )) ) + else if( !(*p_handle = x264_fopen( psz_filename, "w+b" )) ) return -1; return 0;
View file
x264-snapshot-20130723-2245.tar.bz2/tools/checkasm-a.asm -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* checkasm-a.asm: assembly check tool ;***************************************************************************** -;* Copyright (C) 2008-2013 x264 project +;* Copyright (C) 2008-2014 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Henrik Gramner <henrik@gramner.com>
View file
x264-snapshot-20130723-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * checkasm.c: assembly check tool ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -191,7 +191,6 @@ b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" : b->cpu&X264_CPU_CACHELINE_64 ? "_c64" : b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" : - b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" : b->cpu&X264_CPU_LZCNT ? "_lzcnt" : b->cpu&X264_CPU_BMI2 ? "_bmi2" : b->cpu&X264_CPU_BMI1 ? "_bmi1" : @@ -201,7 +200,7 @@ b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" : #endif "", - ((int64_t)10*b->cycles/b->den - nop_time)/4 ); + (int64_t)(10*b->cycles/b->den - nop_time)/4 ); } } @@ -407,7 +406,7 @@ } \ else \ call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \ - if( memcmp(res_c, res_asm, sizeof(res_c)) ) \ + if( memcmp(res_c, res_asm, N*sizeof(int)) ) \ { \ ok = 0; \ fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \ @@ -1452,8 +1451,66 @@ } } } + + if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb ) + { + set_func_name( "plane_copy_deinterleave_rgb" ); + used_asm = 1; + for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + { + int w = (plane_specs[i].w + 2) >> 2; + int h = plane_specs[i].h; + intptr_t src_stride = plane_specs[i].src_stride; + intptr_t dst_stride = ALIGN( w, 16 ); + intptr_t offv = dst_stride*h + 16; + + for( int pw = 3; pw <= 4; pw++ ) + { + memset( pbuf3, 0, 0x1000 ); + memset( pbuf4, 0, 0x1000 ); + call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h ); + call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h ); + for( int y = 0; y < h; y++ ) + if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) || + memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) || + memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) ) + { + ok = 0; + fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw ); + break; + } + } + } + } report( "plane_copy :" ); + if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 ) + { + set_func_name( "plane_copy_deinterleave_v210" ); + used_asm = 1; + for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + { + int w = (plane_specs[i].w + 1) >> 1; + int h = plane_specs[i].h; + intptr_t dst_stride = ALIGN( w, 16 ); + intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t); + intptr_t offv = dst_stride*h + 32; + memset( pbuf3, 0, 0x1000 ); + memset( pbuf4, 0, 0x1000 ); + call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h ); + call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h ); + for( int y = 0; y < h; y++ ) + if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(uint16_t) ) || + memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) ) + { + ok = 0; + fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride ); + break; + } + } + } + report( "v210 :" ); + if( mc_a.hpel_filter != mc_ref.hpel_filter ) { pixel *srchpel = pbuf1+8+2*64; @@ -1541,16 +1598,17 @@ INTEGRAL_INIT( integral_init8v, 9, sum, stride ); report( "integral init :" ); + ok = 1; used_asm = 0; if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost ) { - ok = 1; used_asm = 1; + used_asm = 1; x264_emms(); for( int i = 0; i < 10; i++ ) { - float fps_factor = (rand()&65535) / 256.; - set_func_name( "mbtree_propagate" ); - int *dsta = (int*)buf3; - int *dstc = dsta+400; + float fps_factor = (rand()&65535) / 65535.0f; + set_func_name( "mbtree_propagate_cost" ); + int16_t *dsta = (int16_t*)buf3; + int16_t *dstc = dsta+400; uint16_t *prop = (uint16_t*)buf1; uint16_t *intra = (uint16_t*)buf4; uint16_t *inter = intra+128; @@ -1572,12 +1630,60 @@ { ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4; if( !ok ) - fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] ); + fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] ); } } - report( "mbtree propagate :" ); } + if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list ) + { + used_asm = 1; + for( int i = 0; i < 8; i++ ) + { + set_func_name( "mbtree_propagate_list" ); + x264_t h; + int height = 4; + int width = 128; + int size = width*height; + h.mb.i_mb_stride = width; + h.mb.i_mb_width = width; + h.mb.i_mb_height = height; + + uint16_t *ref_costsc = (uint16_t*)buf3; + uint16_t *ref_costsa = (uint16_t*)buf4; + int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size); + int16_t *propagate_amount = (int16_t*)(mvs + width); + uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width); + h.scratch_buffer2 = (uint8_t*)(ref_costsa + size); + int bipred_weight = (rand()%63)+1; + int list = i&1; + for( int j = 0; j < size; j++ ) + ref_costsc[j] = ref_costsa[j] = rand()&32767; + for( int j = 0; j < width; j++ ) + { + static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}}; + for( int k = 0; k < 2; k++ ) + mvs[j][k] = (rand()&127) - 64; + propagate_amount[j] = rand()&32767; + lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT; + } + + call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list ); + call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list ); + + for( int j = 0; j < size && ok; j++ ) + { + ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1; + if( !ok ) + fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] ); + } + + call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list ); + call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list ); + } + } + report( "mbtree :" ); + if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned ) { set_func_name( "memcpy_aligned" ); @@ -1779,7 +1885,7 @@ } h->param.rc.i_qp_min = 0; - h->param.rc.i_qp_max = QP_MAX; + h->param.rc.i_qp_max = QP_MAX_SPEC; x264_cqm_init( h ); x264_quant_init( h, 0, &qf_c ); x264_quant_init( h, cpu_ref, &qf_ref ); @@ -2504,7 +2610,7 @@ { *cpu_ref = *cpu_new; *cpu_new |= flags; -#if BROKEN_STACK_ALIGNMENT +#if STACK_ALIGNMENT < 16 *cpu_new |= X264_CPU_STACK_MOD4; #endif if( *cpu_new & X264_CPU_SSE2_IS_FAST ) @@ -2549,11 +2655,6 @@ ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" ); cpu1 &= ~X264_CPU_SLOW_CTZ; } - if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN ) - { - ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" ); - cpu1 &= ~X264_CPU_SSE_MISALIGN; - } if( x264_cpu_detect() & X264_CPU_LZCNT ) { ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );
View file
x264-snapshot-20130723-2245.tar.bz2/tools/cltostr.pl -> x264-snapshot-20140321-2245.tar.bz2/tools/cltostr.pl
Changed
@@ -1,6 +1,6 @@ # Perl script used for compiling OpenCL src into x264 binary # -# Copyright (C) 2013 x264 project +# Copyright (C) 2013-2014 x264 project # Authors: Steve Borho <sborho@multicorewareinc.com> use Digest::MD5 qw(md5_hex);
View file
x264-snapshot-20130723-2245.tar.bz2/version.sh -> x264-snapshot-20140321-2245.tar.bz2/version.sh
Changed
@@ -1,5 +1,8 @@ -#!/bin/bash +#!/bin/sh [ -n "$1" ] && cd $1 + +git_version() { +trap 'rm -f config.git-hash' EXIT git rev-list HEAD | sort > config.git-hash LOCALVER=`wc -l config.git-hash | awk '{print $1}'` if [ $LOCALVER \> 1 ] ; then @@ -14,11 +17,13 @@ VER="${VER}M" fi VER="$VER $(git rev-list HEAD -n 1 | cut -c 1-7)" - echo "#define X264_VERSION \" r$VER\"" -else - echo "#define X264_VERSION \"\"" - VER="x" + VERSION=" r$VER" fi -rm -f config.git-hash +} + +VER="x" +VERSION="" +[ -d .git ] && (type git >/dev/null 2>&1) && git_version +echo "#define X264_VERSION \"$VERSION\"" API=`grep '#define X264_BUILD' < x264.h | sed -e 's/.* \([1-9][0-9]*\).*/\1/'` echo "#define X264_POINTVER \"0.$API.$VER\""
View file
x264-snapshot-20130723-2245.tar.bz2/x264.c -> x264-snapshot-20140321-2245.tar.bz2/x264.c
Changed
@@ -1,13 +1,14 @@ /***************************************************************************** * x264: top-level x264cli functions ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> * Steven Walters <kemuri9@gmail.com> * Jason Garrett-Glaser <darkshikari@gmail.com> * Kieran Kunhya <kieran@kunhya.com> + * Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,6 +28,15 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ +#ifdef _WIN32 +/* The following two defines must be located before the inclusion of any system header files. */ +#define WINVER 0x0500 +#define _WIN32_WINNT 0x0500 +#include <windows.h> +#include <io.h> /* _setmode() */ +#include <fcntl.h> /* _O_BINARY */ +#endif + #include <signal.h> #define _GNU_SOURCE #include <getopt.h> @@ -38,13 +48,6 @@ #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ ) -#ifdef _WIN32 -#include <windows.h> -#else -#define GetConsoleTitle(t,n) -#define SetConsoleTitle(t) -#endif - #if HAVE_LAVF #undef DECLARE_ALIGNED #include <libavformat/avformat.h> @@ -61,18 +64,89 @@ #include <ffms.h> #endif +#ifdef _WIN32 +#define CONSOLE_TITLE_SIZE 200 +static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L""; + +void x264_cli_set_console_title( const char *title ) +{ + wchar_t title_utf16[CONSOLE_TITLE_SIZE]; + if( utf8_to_utf16( title, title_utf16 ) ) + SetConsoleTitleW( title_utf16 ); +} + +static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size ) +{ + int invalid; + return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid; +} + +/* Some external libraries doesn't support Unicode in filenames, + * as a workaround we can try to get an ANSI filename instead. */ +int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file ) +{ + wchar_t filename_utf16[MAX_PATH]; + if( utf8_to_utf16( filename, filename_utf16 ) ) + { + if( create_file ) + { + /* Create the file using the Unicode filename if it doesn't already exist. */ + FILE *fh = _wfopen( filename_utf16, L"ab" ); + if( fh ) + fclose( fh ); + } + + /* Check if the filename already is valid ANSI. */ + if( utf16_to_ansi( filename_utf16, ansi_filename, size ) ) + return 1; + + /* Check for a legacy 8.3 short filename. */ + int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH ); + if( short_length > 0 && short_length < MAX_PATH ) + if( utf16_to_ansi( filename_utf16, ansi_filename, size ) ) + return 1; + } + return 0; +} + +/* Retrieve command line arguments as UTF-8. */ +static int get_argv_utf8( int *argc_ptr, char ***argv_ptr ) +{ + int ret = 0; + wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr ); + if( argv_utf16 ) + { + int argc = *argc_ptr; + int offset = (argc+1) * sizeof(char*); + int size = offset; + + for( int i = 0; i < argc; i++ ) + size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL ); + + char **argv = *argv_ptr = malloc( size ); + if( argv ) + { + for( int i = 0; i < argc; i++ ) + { + argv[i] = (char*)argv + offset; + offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL ); + } + argv[argc] = NULL; + ret = 1; + } + LocalFree( argv_utf16 ); + } + return ret; +} +#endif + /* Ctrl-C handler */ static volatile int b_ctrl_c = 0; -static int b_exit_on_ctrl_c = 0; static void sigint_handler( int a ) { - if( b_exit_on_ctrl_c ) - exit(0); b_ctrl_c = 1; } -static char UNUSED originalCTitle[200] = ""; - typedef struct { int b_progress; int i_seek; @@ -114,7 +188,7 @@ "raw", "mkv", "flv", -#if HAVE_GPAC +#if HAVE_GPAC || HAVE_LSMASH "mp4", #endif 0 @@ -211,7 +285,7 @@ fprintf( stderr, "%s [%s]: ", name, s_level ); va_list arg; va_start( arg, fmt ); - vfprintf( stderr, fmt, arg ); + x264_vfprintf( stderr, fmt, arg ); va_end( arg ); } @@ -221,7 +295,7 @@ return; va_list arg; va_start( arg, fmt ); - vfprintf( stderr, fmt, arg ); + x264_vfprintf( stderr, fmt, arg ); va_end( arg ); } @@ -275,18 +349,22 @@ FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" ) #ifdef _WIN32 - _setmode(_fileno(stdin), _O_BINARY); - _setmode(_fileno(stdout), _O_BINARY); -#endif + FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" ) - GetConsoleTitle( originalCTitle, sizeof(originalCTitle) ); + GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE ); + _setmode( _fileno( stdin ), _O_BINARY ); + _setmode( _fileno( stdout ), _O_BINARY ); + _setmode( _fileno( stderr ), _O_BINARY ); +#endif /* Parse command line */ if( parse( argc, argv, ¶m, &opt ) < 0 ) ret = -1; +#ifdef _WIN32 /* Restore title; it can be changed by input modules */ - SetConsoleTitle( originalCTitle ); + SetConsoleTitleW( org_console_title ); +#endif /* Control-C handler */ signal( SIGINT, sigint_handler ); @@ -306,7 +384,10 @@ if( opt.qpfile ) fclose( opt.qpfile ); - SetConsoleTitle( originalCTitle ); +#ifdef _WIN32 + SetConsoleTitleW( org_console_title ); + free( argv ); +#endif return ret; } @@ -339,16 +420,19 @@ printf( INDENT ); for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ ) { - printf( "%s", x264_cli_csps[i].name ); - if( i+1 < X264_CSP_CLI_MAX ) - printf( ", " ); + if( x264_cli_csps[i].name ) + { + printf( "%s", x264_cli_csps[i].name ); + if( i+1 < X264_CSP_CLI_MAX ) + printf( ", " ); + } } #if HAVE_LAVF printf( "\n" ); printf( " - valid csps for `lavf' demuxer:\n" ); printf( INDENT ); size_t line_len = strlen( INDENT ); - for( enum PixelFormat i = PIX_FMT_NONE+1; i < PIX_FMT_NB; i++ ) + for( enum PixelFormat i = AV_PIX_FMT_NONE+1; i < AV_PIX_FMT_NB; i++ ) { const char *pfname = av_get_pix_fmt_name( i ); if( pfname ) @@ -361,7 +445,7 @@ } printf( "%s", pfname ); line_len += name_len; - if( i+1 < PIX_FMT_NB ) + if( i+1 < AV_PIX_FMT_NB ) { printf( ", " ); line_len += 2; @@ -389,7 +473,7 @@ " .264 -> Raw bytestream\n" " .mkv -> Matroska\n" " .flv -> Flash Video\n" - " .mp4 -> MP4 if compiled with GPAC support (%s)\n" + " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n" "Output bit depth: %d (configured at compile time)\n" "\n" "Options:\n" @@ -415,7 +499,9 @@ "no", #endif #if HAVE_GPAC - "yes", + "gpac", +#elif HAVE_LSMASH + "lsmash", #else "no", #endif @@ -764,6 +850,8 @@ H2( " --nal-hrd <string> Signal HRD information (requires vbv-bufsize)\n" " - none, vbr, cbr (cbr not allowed in .mp4)\n" ); + H2( " --filler Force hard-CBR and generate filler (implied by\n" + " --nal-hrd cbr)\n" ); H2( " --pic-struct Force pic_struct in Picture Timing SEI\n" ); H2( " --crop-rect <string> Add 'left,top,right,bottom' to the bitstream-level\n" " cropping rectangle\n" ); @@ -792,6 +880,8 @@ H0( " --frames <integer> Maximum number of frames to encode\n" ); H0( " --level <string> Specify level (as defined by Annex A)\n" ); H1( " --bluray-compat Enable compatibility hacks for Blu-ray support\n" ); + H1( " --avcintra-class <integer> Use compatibility hacks for AVC-Intra class\n" + " - 50, 100, 200\n" ); H1( " --stitchable Don't optimize headers based on video content\n" " Ensures ability to recombine a segmented encode\n" ); H1( "\n" ); @@ -815,8 +905,7 @@ H2( " --no-asm Disable all CPU optimizations\n" ); H2( " --opencl Enable use of OpenCL\n" ); H2( " --opencl-clbin <string> Specify path of compiled OpenCL kernel cache\n" ); - H2( " --opencl-device <integer> Specify OpenCL device ordinal\n" ); - H2( " --visualize Show MB types overlayed on the encoded video\n" ); + H2( " --opencl-device <integer> Specify OpenCL device ordinal\n" ); H2( " --dump-yuv <string> Save reconstructed frames\n" ); H2( " --sps-id <integer> Set SPS and PPS id numbers [%d]\n", defaults->i_sps_id ); H2( " --aud Use access unit delimiters\n" ); @@ -848,7 +937,6 @@ OPT_THREAD_INPUT, OPT_QUIET, OPT_NOPROGRESS, - OPT_VISUALIZE, OPT_LONGHELP, OPT_PROFILE, OPT_PRESET, @@ -895,6 +983,7 @@ { "b-pyramid", required_argument, NULL, 0 }, { "open-gop", no_argument, NULL, 0 }, { "bluray-compat", no_argument, NULL, 0 }, + { "avcintra-class", required_argument, NULL, 0 }, { "min-keyint", required_argument, NULL, 'i' }, { "keyint", required_argument, NULL, 'I' }, { "intra-refresh", no_argument, NULL, 0 }, @@ -997,7 +1086,6 @@ { "verbose", no_argument, NULL, 'v' }, { "log-level", required_argument, NULL, OPT_LOG_LEVEL }, { "no-progress", no_argument, NULL, OPT_NOPROGRESS }, - { "visualize", no_argument, NULL, OPT_VISUALIZE }, { "dump-yuv", required_argument, NULL, 0 }, { "sps-id", required_argument, NULL, 0 }, { "aud", no_argument, NULL, 0 }, @@ -1041,6 +1129,7 @@ { "output-csp", required_argument, NULL, OPT_OUTPUT_CSP }, { "input-range", required_argument, NULL, OPT_INPUT_RANGE }, { "stitchable", no_argument, NULL, 0 }, + { "filler", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -1052,7 +1141,7 @@ if( !strcasecmp( ext, "mp4" ) ) { -#if HAVE_GPAC +#if HAVE_GPAC || HAVE_LSMASH cli_output = mp4_output; param->b_annexb = 0; param->b_repeat_headers = 0; @@ -1094,7 +1183,7 @@ b_regular = b_regular && x264_is_regular_file_path( filename ); if( b_regular ) { - FILE *f = fopen( filename, "r" ); + FILE *f = x264_fopen( filename, "r" ); if( f ) { b_regular = x264_is_regular_file( f ); @@ -1197,7 +1286,7 @@ int csp = info->csp & X264_CSP_MASK; if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) ) param->i_csp = X264_CSP_I420; - else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_NV16) ) + else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_V210) ) param->i_csp = X264_CSP_I422; else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) ) param->i_csp = X264_CSP_I444; @@ -1338,7 +1427,7 @@ input_opt.index_file = optarg; break; case OPT_QPFILE: - opt->qpfile = fopen( optarg, "rb" ); + opt->qpfile = x264_fopen( optarg, "rb" ); FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg ) if( !x264_is_regular_file( opt->qpfile ) ) { @@ -1366,14 +1455,6 @@ case OPT_NOPROGRESS: opt->b_progress = 0; break; - case OPT_VISUALIZE: -#if HAVE_VISUALIZE - param->b_visualize = 1; - b_exit_on_ctrl_c = 1; -#else - x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" ); -#endif - break; case OPT_TUNE: case OPT_PRESET: break; @@ -1397,7 +1478,7 @@ tcfile_name = optarg; break; case OPT_TCFILE_OUT: - opt->tcfile_out = fopen( optarg, "wb" ); + opt->tcfile_out = x264_fopen( optarg, "wb" ); FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg ) break; case OPT_TIMEBASE: @@ -1498,8 +1579,11 @@ info.fps_den = param->i_fps_den; info.fullrange = input_opt.input_range == RANGE_PC; info.interlaced = param->b_interlaced; - info.sar_width = param->vui.i_sar_width; - info.sar_height = param->vui.i_sar_height; + if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 ) + { + info.sar_width = param->vui.i_sar_width; + info.sar_height = param->vui.i_sar_height; + } info.tff = param->b_tff; info.vfr = param->b_vfr_input; @@ -1542,7 +1626,7 @@ #endif /* override detected values by those specified by the user */ - if( param->vui.i_sar_width && param->vui.i_sar_height ) + if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 ) { info.sar_width = param->vui.i_sar_width; info.sar_height = param->vui.i_sar_height; @@ -1721,11 +1805,9 @@ eta/3600, (eta/60)%60, eta%60 ); } else - { sprintf( buf, "x264 %d frames: %.2f fps, %.2f kb/s", i_frame, fps, bitrate ); - } fprintf( stderr, "%s \r", buf+5 ); - SetConsoleTitle( buf ); + x264_cli_set_console_title( buf ); fflush( stderr ); // needed in windows return i_time; }
View file
x264-snapshot-20130723-2245.tar.bz2/x264.h -> x264-snapshot-20140321-2245.tar.bz2/x264.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264.h: x264 public header ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -41,7 +41,7 @@ #include "x264_config.h" -#define X264_BUILD 135 +#define X264_BUILD 142 /* Application developers planning to link against a shared library version of * libx264 from a Microsoft Visual Studio or similar development environment @@ -98,12 +98,15 @@ int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */ int i_last_mb; /* If this NAL is a slice, the index of the last MB in the slice. */ - /* Size of payload in bytes. */ + /* Size of payload (including any padding) in bytes. */ int i_payload; /* If param->b_annexb is set, Annex-B bytestream with startcode. * Otherwise, startcode is replaced with a 4-byte size. * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */ uint8_t *p_payload; + + /* Size of padding in bytes. */ + int i_padding; } x264_nal_t; /**************************************************************************** @@ -122,30 +125,29 @@ #define X264_CPU_SSSE3 0x0000040 #define X264_CPU_SSE4 0x0000080 /* SSE4.1 */ #define X264_CPU_SSE42 0x0000100 /* SSE4.2 */ -#define X264_CPU_SSE_MISALIGN 0x0000200 /* Phenom support for misaligned SSE instruction arguments */ -#define X264_CPU_LZCNT 0x0000400 /* Phenom support for "leading zero count" instruction. */ -#define X264_CPU_AVX 0x0000800 /* AVX support: requires OS support even if YMM registers aren't used. */ -#define X264_CPU_XOP 0x0001000 /* AMD XOP */ -#define X264_CPU_FMA4 0x0002000 /* AMD FMA4 */ -#define X264_CPU_AVX2 0x0004000 /* AVX2 */ -#define X264_CPU_FMA3 0x0008000 /* Intel FMA3 */ -#define X264_CPU_BMI1 0x0010000 /* BMI1 */ -#define X264_CPU_BMI2 0x0020000 /* BMI2 */ +#define X264_CPU_LZCNT 0x0000200 /* Phenom support for "leading zero count" instruction. */ +#define X264_CPU_AVX 0x0000400 /* AVX support: requires OS support even if YMM registers aren't used. */ +#define X264_CPU_XOP 0x0000800 /* AMD XOP */ +#define X264_CPU_FMA4 0x0001000 /* AMD FMA4 */ +#define X264_CPU_AVX2 0x0002000 /* AVX2 */ +#define X264_CPU_FMA3 0x0004000 /* Intel FMA3 */ +#define X264_CPU_BMI1 0x0008000 /* BMI1 */ +#define X264_CPU_BMI2 0x0010000 /* BMI2 */ /* x86 modifiers */ -#define X264_CPU_CACHELINE_32 0x0040000 /* avoid memory loads that span the border between two cachelines */ -#define X264_CPU_CACHELINE_64 0x0080000 /* 32/64 is the size of a cacheline in bytes */ -#define X264_CPU_SSE2_IS_SLOW 0x0100000 /* avoid most SSE2 functions on Athlon64 */ -#define X264_CPU_SSE2_IS_FAST 0x0200000 /* a few functions are only faster on Core2 and Phenom */ -#define X264_CPU_SLOW_SHUFFLE 0x0400000 /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */ -#define X264_CPU_STACK_MOD4 0x0800000 /* if stack is only mod4 and not mod16 */ -#define X264_CPU_SLOW_CTZ 0x1000000 /* BSR/BSF x86 instructions are really slow on some CPUs */ -#define X264_CPU_SLOW_ATOM 0x2000000 /* The Atom is terrible: slow SSE unaligned loads, slow +#define X264_CPU_CACHELINE_32 0x0020000 /* avoid memory loads that span the border between two cachelines */ +#define X264_CPU_CACHELINE_64 0x0040000 /* 32/64 is the size of a cacheline in bytes */ +#define X264_CPU_SSE2_IS_SLOW 0x0080000 /* avoid most SSE2 functions on Athlon64 */ +#define X264_CPU_SSE2_IS_FAST 0x0100000 /* a few functions are only faster on Core2 and Phenom */ +#define X264_CPU_SLOW_SHUFFLE 0x0200000 /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */ +#define X264_CPU_STACK_MOD4 0x0400000 /* if stack is only mod4 and not mod16 */ +#define X264_CPU_SLOW_CTZ 0x0800000 /* BSR/BSF x86 instructions are really slow on some CPUs */ +#define X264_CPU_SLOW_ATOM 0x1000000 /* The Atom is terrible: slow SSE unaligned loads, slow * SIMD multiplies, slow SIMD variable shifts, slow pshufb, * cacheline split penalties -- gather everything here that * isn't shared by other CPUs to avoid making half a dozen * new SLOW flags. */ -#define X264_CPU_SLOW_PSHUFB 0x4000000 /* such as on the Intel Atom */ -#define X264_CPU_SLOW_PALIGNR 0x8000000 /* such as on the AMD Bobcat */ +#define X264_CPU_SLOW_PSHUFB 0x2000000 /* such as on the Intel Atom */ +#define X264_CPU_SLOW_PALIGNR 0x4000000 /* such as on the AMD Bobcat */ /* PowerPC */ #define X264_CPU_ALTIVEC 0x0000001 @@ -213,12 +215,13 @@ #define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */ #define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */ #define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */ -#define X264_CSP_I444 0x0007 /* yuv 4:4:4 planar */ -#define X264_CSP_YV24 0x0008 /* yvu 4:4:4 planar */ -#define X264_CSP_BGR 0x0009 /* packed bgr 24bits */ -#define X264_CSP_BGRA 0x000a /* packed bgr 32bits */ -#define X264_CSP_RGB 0x000b /* packed rgb 24bits */ -#define X264_CSP_MAX 0x000c /* end of list */ +#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */ +#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */ +#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */ +#define X264_CSP_BGR 0x000a /* packed bgr 24bits */ +#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */ +#define X264_CSP_RGB 0x000c /* packed rgb 24bits */ +#define X264_CSP_MAX 0x000d /* end of list */ #define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */ #define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */ @@ -319,6 +322,7 @@ int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */ int b_open_gop; int b_bluray_compat; + int i_avcintra_class; int b_deblocking_filter; int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */ @@ -331,7 +335,7 @@ int b_constrained_intra; int i_cqm_preset; - char *psz_cqm_file; /* JM format */ + char *psz_cqm_file; /* filename (in UTF-8) of CQM file, JM format */ uint8_t cqm_4iy[16]; /* used only if i_cqm_preset == X264_CQM_CUSTOM */ uint8_t cqm_4py[16]; uint8_t cqm_4ic[16]; @@ -345,9 +349,8 @@ void (*pf_log)( void *, int i_level, const char *psz, va_list ); void *p_log_private; int i_log_level; - int b_visualize; int b_full_recon; /* fully reconstruct frames, even when not necessary for encoding. Implied by psz_dump_yuv */ - char *psz_dump_yuv; /* filename for reconstructed frames */ + char *psz_dump_yuv; /* filename (in UTF-8) for reconstructed frames */ /* Encoder analyser parameters */ struct @@ -406,6 +409,10 @@ float f_ip_factor; float f_pb_factor; + /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR. + * Implied by NAL-HRD CBR. */ + int b_filler; + int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */ float f_aq_strength; int b_mb_tree; /* Macroblock-tree ratecontrol. */ @@ -413,9 +420,9 @@ /* 2pass */ int b_stat_write; /* Enable stat writing in psz_stat_out */ - char *psz_stat_out; + char *psz_stat_out; /* output filename (in UTF-8) of the 2pass stats file */ int b_stat_read; /* Read stat from psz_stat_in and use it */ - char *psz_stat_in; + char *psz_stat_in; /* input filename (in UTF-8) of the 2pass stats file */ /* 2pass params (same as ffmpeg ones) */ float f_qcompress; /* 0.0 => cbr, 1.0 => constant qp */ @@ -483,7 +490,7 @@ int b_opencl; /* use OpenCL when available */ int i_opencl_device; /* specify count of GPU devices to skip, for CLI users */ void *opencl_device_id; /* pass explicit cl_device_id as void*, for API users */ - char *psz_clbin_file; /* compiled OpenCL kernel cache file */ + char *psz_clbin_file; /* filename (in UTF-8) of the compiled OpenCL kernel cache file */ /* Slicing parameters */ int i_slice_max_size; /* Max size per slice in bytes; includes estimated NAL overhead. */ @@ -877,13 +884,15 @@ /* x264_encoder_headers: * return the SPS and PPS that will be used for the whole stream. * *pi_nal is the number of NAL units outputted in pp_nal. + * returns the number of bytes in the returned NALs. * returns negative on error. * the payloads of all output NALs are guaranteed to be sequential in memory. */ int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); /* x264_encoder_encode: * encode one picture. * *pi_nal is the number of NAL units outputted in pp_nal. - * returns negative on error, zero if no NAL units returned. + * returns the number of bytes in the returned NALs. + * returns negative on error and zero if no NAL units returned. * the payloads of all output NALs are guaranteed to be sequential in memory. */ int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); /* x264_encoder_close:
View file
x264-snapshot-20130723-2245.tar.bz2/x264cli.h -> x264-snapshot-20140321-2245.tar.bz2/x264cli.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264cli.h: x264cli common ***************************************************************************** - * Copyright (C) 2003-2013 x264 project + * Copyright (C) 2003-2014 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -63,6 +63,13 @@ void x264_cli_log( const char *name, int i_level, const char *fmt, ... ); void x264_cli_printf( int i_level, const char *fmt, ... ); +#ifdef _WIN32 +void x264_cli_set_console_title( const char *title ); +int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file ); +#else +#define x264_cli_set_console_title( title ) +#endif + #define RETURN_IF_ERR( cond, name, ret, ... )\ if( cond )\ {\
View file
x264-snapshot-20130723-2245.tar.bz2/x264dll.c -> x264-snapshot-20140321-2245.tar.bz2/x264dll.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264dll: x264 DLLMain for win32 ***************************************************************************** - * Copyright (C) 2009-2013 x264 project + * Copyright (C) 2009-2014 x264 project * * Authors: Anton Mitrofanov <BugMaster@narod.ru> * @@ -27,7 +27,7 @@ #include <windows.h> /* Callback for our DLL so we can initialize pthread */ -BOOL WINAPI DllMain( HANDLE hinstDLL, DWORD fdwReason, LPVOID lpvReserved ) +BOOL WINAPI DllMain( HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved ) { #if PTW32_STATIC_LIB switch( fdwReason )
View file
x264-snapshot-20130723-2245.tar.bz2/x264res.rc -> x264-snapshot-20140321-2245.tar.bz2/x264res.rc
Changed
@@ -1,9 +1,9 @@ /***************************************************************************** * x264res.rc: windows resource file ***************************************************************************** - * Copyright (C) 2012-2013 x264 project + * Copyright (C) 2012-2014 x264 project * - * Authors: Henrik Gramner <hengar-6@student.ltu.se> + * Authors: Henrik Gramner <henrik@gramner.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ BEGIN BLOCK "StringFileInfo" BEGIN - BLOCK "040904E4" + BLOCK "040904B0" BEGIN VALUE "CompanyName", "x264 project" #ifdef DLL @@ -60,7 +60,7 @@ #endif VALUE "FileVersion", X264_POINTVER VALUE "InternalName", "x264" - VALUE "LegalCopyright", "Copyright (C) 2003-2013 x264 project" + VALUE "LegalCopyright", "Copyright (C) 2003-2014 x264 project" #ifdef DLL VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll" #else @@ -73,6 +73,6 @@ BLOCK "VarFileInfo" BEGIN - VALUE "Translation", 0x0409, 0x04E4 + VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */ END END
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.