Changes of Revision 6
libx264.changes
Changed
x
1
2
-------------------------------------------------------------------
3
+Sat Mar 22 17:10:14 UTC 2014 - i@margueirte.su
4
+
5
+- update version 20140321.
6
+
7
+-------------------------------------------------------------------
8
Tue Nov 19 07:53:08 UTC 2013 - obs@botter.cc
9
10
- add -fno-aggressive-loop-optimizations to extra-cflags in
11
libx264.spec
Changed
12
1
2
# Please submit bugfixes or comments via http://bugs.links2linux.org/
3
4
Name: libx264
5
-%define soname 135
6
-%define svn 20130723
7
+%define soname 142
8
+%define svn 20140321
9
Version: 0.%{soname}svn%{svn}
10
Release: 1
11
License: GPL-2.0+
12
x264-snapshot-20130723-2245.tar.bz2/common/display-x11.c
Deleted
220
1
2
-/*****************************************************************************
3
- * display-x11.c: x11 interface
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-#include <X11/Xlib.h>
28
-#include <X11/Xutil.h>
29
-#include <stdio.h>
30
-#include <stdlib.h>
31
-#include <string.h>
32
-
33
-#include "common.h"
34
-#include "display.h"
35
-
36
-static long event_mask = ConfigureNotify|ExposureMask|KeyPressMask|ButtonPressMask|StructureNotifyMask|ResizeRedirectMask;
37
-
38
-static Display *disp_display = NULL;
39
-static struct disp_window
40
-{
41
- int init;
42
- Window window;
43
-} disp_window[10];
44
-
45
-static inline void disp_chkerror( int cond, char *e )
46
-{
47
- if( !cond )
48
- return;
49
- fprintf( stderr, "error: %s\n", e ? e : "?" );
50
- abort();
51
-}
52
-
53
-static void disp_init_display( void )
54
-{
55
- Visual *visual;
56
- int dpy_class;
57
- int screen;
58
- int dpy_depth;
59
-
60
- if( disp_display )
61
- return;
62
- memset( &disp_window, 0, sizeof(disp_window) );
63
- disp_display = XOpenDisplay( "" );
64
- disp_chkerror( !disp_display, "no display" );
65
- screen = DefaultScreen( disp_display );
66
- visual = DefaultVisual( disp_display, screen );
67
- dpy_class = visual->class;
68
- dpy_depth = DefaultDepth( disp_display, screen );
69
- disp_chkerror( !((dpy_class == TrueColor && dpy_depth == 32)
70
- || (dpy_class == TrueColor && dpy_depth == 24)
71
- || (dpy_class == TrueColor && dpy_depth == 16)
72
- || (dpy_class == PseudoColor && dpy_depth == 8)),
73
- "requires 8 bit PseudoColor or 16/24/32 bit TrueColor display" );
74
-}
75
-
76
-static void disp_init_window( int num, int width, int height, const unsigned char *title )
77
-{
78
- XSetWindowAttributes xswa;
79
- XEvent xev;
80
- int screen = DefaultScreen(disp_display);
81
- Visual *visual = DefaultVisual (disp_display, screen);
82
- char buf[200];
83
- Window window;
84
-
85
- if( title )
86
- snprintf( buf, 200, "%s: %i/disp", title, num );
87
- else
88
- snprintf( buf, 200, "%i/disp", num );
89
-
90
- XSizeHints *shint = XAllocSizeHints();
91
- disp_chkerror( !shint, "memerror" );
92
- shint->min_width = shint->max_width = shint->width = width;
93
- shint->min_height = shint->max_height = shint->height = height;
94
- shint->flags = PSize | PMinSize | PMaxSize;
95
- disp_chkerror( num < 0 || num >= 10, "bad win num" );
96
- if( !disp_window[num].init )
97
- {
98
- unsigned int mask = 0;
99
- disp_window[num].init = 1;
100
- unsigned int bg = WhitePixel( disp_display, screen );
101
- unsigned int fg = BlackPixel( disp_display, screen );
102
- int dpy_depth = DefaultDepth( disp_display, screen );
103
- if( dpy_depth==32 || dpy_depth==24 || dpy_depth==16 )
104
- {
105
- mask |= CWColormap;
106
- xswa.colormap = XCreateColormap( disp_display, DefaultRootWindow( disp_display ), visual, AllocNone );
107
- }
108
- xswa.background_pixel = bg;
109
- xswa.border_pixel = fg;
110
- xswa.backing_store = Always;
111
- xswa.backing_planes = -1;
112
- xswa.bit_gravity = NorthWestGravity;
113
- mask = CWBackPixel | CWBorderPixel | CWBackingStore | CWBackingPlanes | CWBitGravity;
114
- window = XCreateWindow( disp_display, DefaultRootWindow( disp_display ),
115
- shint->x, shint->y, shint->width, shint->height,
116
- 1, dpy_depth, InputOutput, visual, mask, &xswa );
117
- disp_window[num].window = window;
118
-
119
- XSelectInput( disp_display, window, event_mask );
120
- XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
121
- XMapWindow( disp_display, window );
122
-
123
- do {
124
- XNextEvent( disp_display, &xev );
125
- } while( xev.type != MapNotify || xev.xmap.event != window );
126
- }
127
- window = disp_window[num].window;
128
- XSetStandardProperties( disp_display, window, buf, buf, None, NULL, 0, shint );
129
- XResizeWindow( disp_display, window, width, height );
130
- XSync( disp_display, 1 );
131
- XFree( shint );
132
-}
133
-
134
-void disp_sync( void )
135
-{
136
- XSync( disp_display, 1 );
137
-}
138
-
139
-void disp_setcolor( unsigned char *name )
140
-{
141
- XColor c_exact, c_nearest;
142
-
143
- int screen = DefaultScreen( disp_display );
144
- GC gc = DefaultGC( disp_display, screen );
145
- Colormap cm = DefaultColormap( disp_display, screen );
146
- Status st = XAllocNamedColor( disp_display, cm, name, &c_nearest, &c_exact );
147
- disp_chkerror( st != 1, "XAllocNamedColor error" );
148
- XSetForeground( disp_display, gc, c_nearest.pixel );
149
-}
150
-
151
-void disp_gray( int num, char *data, int width, int height, int stride, const unsigned char *title )
152
-{
153
- char dummy;
154
-
155
- disp_init_display();
156
- disp_init_window( num, width, height, title );
157
- int screen = DefaultScreen( disp_display );
158
- Visual *visual = DefaultVisual( disp_display, screen );
159
- int dpy_depth = DefaultDepth( disp_display, screen );
160
- XImage *ximage = XCreateImage( disp_display, visual, dpy_depth, ZPixmap, 0, &dummy, width, height, 8, 0 );
161
- disp_chkerror( !ximage, "no ximage" );
162
-#if WORDS_BIGENDIAN
163
- ximage->byte_order = MSBFirst;
164
- ximage->bitmap_bit_order = MSBFirst;
165
-#else
166
- ximage->byte_order = LSBFirst;
167
- ximage->bitmap_bit_order = LSBFirst;
168
-#endif
169
-
170
- int pixelsize = dpy_depth>8 ? sizeof(int) : sizeof(unsigned char);
171
- uint8_t *image = malloc( width * height * pixelsize );
172
- disp_chkerror( !image, "malloc failed" );
173
- for( int y = 0; y < height; y++ )
174
- for( int x = 0; x < width; x++ )
175
- memset( &image[(width*y + x)*pixelsize], data[y*stride+x], pixelsize );
176
- ximage->data = image;
177
- GC gc = DefaultGC( disp_display, screen );
178
-
179
- XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
180
- XPutImage( disp_display, disp_window[num].window, gc, ximage, 0, 0, 0, 0, width, height );
181
-
182
- XDestroyImage( ximage );
183
- XSync( disp_display, 1 );
184
-
185
-}
186
-
187
-void disp_gray_zoom(int num, char *data, int width, int height, int stride, const unsigned char *title, int zoom)
188
-{
189
- unsigned char *dataz = malloc( width*zoom * height*zoom );
190
- disp_chkerror( !dataz, "malloc" );
191
- for( int y = 0; y < height; y++ )
192
- for( int x = 0; x < width; x++ )
193
- for( int y0 = 0; y0 < zoom; y0++ )
194
- for( int x0 = 0; x0 < zoom; x0++ )
195
- dataz[(y*zoom + y0)*width*zoom + x*zoom + x0] = data[y*stride+x];
196
- disp_gray( num, dataz, width*zoom, height*zoom, width*zoom, title );
197
- free( dataz );
198
-}
199
-
200
-void disp_point( int num, int x1, int y1 )
201
-{
202
- int screen = DefaultScreen( disp_display );
203
- GC gc = DefaultGC( disp_display, screen );
204
- XDrawPoint( disp_display, disp_window[num].window, gc, x1, y1 );
205
-}
206
-
207
-void disp_line( int num, int x1, int y1, int x2, int y2 )
208
-{
209
- int screen = DefaultScreen( disp_display );
210
- GC gc = DefaultGC( disp_display, screen );
211
- XDrawLine( disp_display, disp_window[num].window, gc, x1, y1, x2, y2 );
212
-}
213
-
214
-void disp_rect( int num, int x1, int y1, int x2, int y2 )
215
-{
216
- int screen = DefaultScreen( disp_display );
217
- GC gc = DefaultGC( disp_display, screen );
218
- XDrawRectangle( disp_display, disp_window[num].window, gc, x1, y1, x2-x1, y2-y1 );
219
-}
220
x264-snapshot-20130723-2245.tar.bz2/common/display.h
Deleted
43
1
2
-/*****************************************************************************
3
- * display.h: x11 visualization interface
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-#ifndef X264_DISPLAY_H
28
-#define X264_DISPLAY_H
29
-
30
-void disp_sync(void);
31
-void disp_setcolor(unsigned char *name);
32
-/* Display a region of byte wide memory as a grayscale image.
33
- * num is the window to use for displaying. */
34
-void disp_gray(int num, char *data, int width, int height,
35
- int stride, const unsigned char *title);
36
-void disp_gray_zoom(int num, char *data, int width, int height,
37
- int stride, const unsigned char *title, int zoom);
38
-void disp_point(int num, int x1, int y1);
39
-void disp_line(int num, int x1, int y1, int x2, int y2);
40
-void disp_rect(int num, int x1, int y1, int x2, int y2);
41
-
42
-#endif
43
x264-snapshot-20130723-2245.tar.bz2/common/visualize.c
Deleted
343
1
2
-/*****************************************************************************
3
- * visualize.c: visualization
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-/*
28
- * Some explanation of the symbols used:
29
- * Red/pink: intra block
30
- * Blue: inter block
31
- * Green: skip block
32
- * Yellow: B-block (not visualized properly yet)
33
- *
34
- * Motion vectors have black dot at their target (ie. at the MB center),
35
- * instead of arrowhead. The black dot is enclosed in filled diamond with radius
36
- * depending on reference frame number (one frame back = zero width, normal case).
37
- *
38
- * The intra blocks have generally lines drawn perpendicular
39
- * to the prediction direction, so for example, if there is a pink block
40
- * with horizontal line at the top of it, it is interpolated by assuming
41
- * luma to be vertically constant.
42
- * DC predicted blocks have both horizontal and vertical lines,
43
- * pink blocks with a diagonal line are predicted using the planar function.
44
- */
45
-
46
-#include "common.h"
47
-#include "visualize.h"
48
-#include "display.h"
49
-
50
-typedef struct
51
-{
52
- int i_type;
53
- int i_partition;
54
- int i_sub_partition[4];
55
- int i_intra16x16_pred_mode;
56
- int intra4x4_pred_mode[4][4];
57
- int8_t ref[2][4][4]; /* [list][y][x] */
58
- int16_t mv[2][4][4][2]; /* [list][y][x][mvxy] */
59
-} visualize_t;
60
-
61
-/* Return string from stringlist corresponding to the given code */
62
-#define GET_STRING(sl, code) get_string((sl), sizeof(sl)/sizeof(*(sl)), code)
63
-
64
-typedef struct
65
-{
66
- int code;
67
- char *string;
68
-} stringlist_t;
69
-
70
-static char *get_string( const stringlist_t *sl, int entries, int code )
71
-{
72
- for( int i = 0; i < entries; i++ )
73
- if( sl[i].code == code )
74
- return sl[i].string;
75
- return "?";
76
-}
77
-
78
-/* Plot motion vector */
79
-static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
80
-{
81
- int dx = dmv[0];
82
- int dy = dmv[1];
83
-
84
- dx = (dx * zoom + 2) >> 2;
85
- dy = (dy * zoom + 2) >> 2;
86
- disp_line( 0, x0, y0, x0+dx, y0+dy );
87
- for( int i = 1; i < ref; i++ )
88
- {
89
- disp_line( 0, x0 , y0-i, x0+i, y0 );
90
- disp_line( 0, x0+i, y0 , x0 , y0+i );
91
- disp_line( 0, x0 , y0+i, x0-i, y0 );
92
- disp_line( 0, x0-i, y0 , x0 , y0-i );
93
- }
94
- disp_setcolor( "black" );
95
- disp_point( 0, x0, y0 );
96
- disp_setcolor( col );
97
-}
98
-
99
-int x264_visualize_init( x264_t *h )
100
-{
101
- CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
102
- return 0;
103
-fail:
104
- return -1;
105
-}
106
-
107
-void x264_visualize_mb( x264_t *h )
108
-{
109
- visualize_t *v = (visualize_t*)h->visualize + h->mb.i_mb_xy;
110
-
111
- /* Save all data for the MB that we need for drawing the visualization */
112
- v->i_type = h->mb.i_type;
113
- v->i_partition = h->mb.i_partition;
114
- for( int i = 0; i < 4; i++ )
115
- v->i_sub_partition[i] = h->mb.i_sub_partition[i];
116
- for( int y = 0; y < 4; y++ )
117
- for( int x = 0; x < 4; x++ )
118
- v->intra4x4_pred_mode[y][x] = h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+y*8+x];
119
- for( int l = 0; l < 2; l++ )
120
- for( int y = 0; y < 4; y++ )
121
- for( int x = 0; x < 4; x++ )
122
- {
123
- for( int i = 0; i < 2; i++ )
124
- v->mv[l][y][x][i] = h->mb.cache.mv[l][X264_SCAN8_0+y*8+x][i];
125
- v->ref[l][y][x] = h->mb.cache.ref[l][X264_SCAN8_0+y*8+x];
126
- }
127
- v->i_intra16x16_pred_mode = h->mb.i_intra16x16_pred_mode;
128
-}
129
-
130
-void x264_visualize_close( x264_t *h )
131
-{
132
- x264_free(h->visualize);
133
-}
134
-
135
-/* Display visualization (block types, MVs) of the encoded frame */
136
-/* FIXME: B-type MBs not handled yet properly */
137
-void x264_visualize_show( x264_t *h )
138
-{
139
- static const stringlist_t mb_types[] =
140
- {
141
- /* Block types marked as NULL will not be drawn */
142
- { I_4x4 , "red" },
143
- { I_8x8 , "#ff5640" },
144
- { I_16x16 , "#ff8060" },
145
- { I_PCM , "violet" },
146
- { P_L0 , "SlateBlue" },
147
- { P_8x8 , "blue" },
148
- { P_SKIP , "green" },
149
- { B_DIRECT, "yellow" },
150
- { B_L0_L0 , "yellow" },
151
- { B_L0_L1 , "yellow" },
152
- { B_L0_BI , "yellow" },
153
- { B_L1_L0 , "yellow" },
154
- { B_L1_L1 , "yellow" },
155
- { B_L1_BI , "yellow" },
156
- { B_BI_L0 , "yellow" },
157
- { B_BI_L1 , "yellow" },
158
- { B_BI_BI , "yellow" },
159
- { B_8x8 , "yellow" },
160
- { B_SKIP , "yellow" },
161
- };
162
-
163
- static const int waitkey = 1; /* Wait for enter after each frame */
164
- static const int drawbox = 1; /* Draw box around each block */
165
- static const int borders = 0; /* Display extrapolated borders outside frame */
166
- static const int zoom = 2; /* Zoom factor */
167
-
168
- static const int pad = 32;
169
- pixel *const frame = h->fdec->plane[0];
170
- const int width = h->param.i_width;
171
- const int height = h->param.i_height;
172
- const int stride = h->fdec->i_stride[0];
173
-
174
- if( borders )
175
- disp_gray_zoom( 0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom );
176
- else
177
- disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
178
-
179
- for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
180
- {
181
- visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
182
- const int mb_y = mb_xy / h->mb.i_mb_width;
183
- const int mb_x = mb_xy % h->mb.i_mb_width;
184
- char *const col = GET_STRING( mb_types, v->i_type );
185
- int x = mb_x*16*zoom;
186
- int y = mb_y*16*zoom;
187
- int l = 0;
188
-
189
- if( !col )
190
- continue;
191
-
192
- if( borders )
193
- {
194
- x += pad*zoom;
195
- y += pad*zoom;
196
- }
197
-
198
- disp_setcolor( col );
199
- if( drawbox ) disp_rect( 0, x, y, x+16*zoom-1, y+16*zoom-1 );
200
-
201
- if( v->i_type==P_L0 || v->i_type==P_8x8 || v->i_type==P_SKIP )
202
- {
203
- /* Predicted (inter) mode, with motion vector */
204
- if( v->i_partition == D_16x16 || v->i_type == P_SKIP )
205
- mv( x+8*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
206
- else if (v->i_partition == D_16x8)
207
- {
208
- if( drawbox ) disp_rect( 0, x, y, x+16*zoom, y+8*zoom );
209
- mv( x+8*zoom, y+4*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
210
- if( drawbox ) disp_rect( 0, x, y+8*zoom, x+16*zoom, y+16*zoom );
211
- mv( x+8*zoom, y+12*zoom, v->mv[l][2][0], v->ref[l][2][0], zoom, col );
212
- }
213
- else if( v->i_partition==D_8x16 )
214
- {
215
- if( drawbox ) disp_rect( 0, x, y, x+8*zoom, y+16*zoom );
216
- mv( x+4*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col );
217
- if( drawbox ) disp_rect( 0, x+8*zoom, y, x+16*zoom, y+16*zoom );
218
- mv( x+12*zoom, y+8*zoom, v->mv[l][0][2], v->ref[l][0][2], zoom, col );
219
- }
220
- else if( v->i_partition==D_8x8 )
221
- {
222
- for( int i = 0; i < 2; i++ )
223
- for( int j = 0; j < 2; j++ )
224
- {
225
- int sp = v->i_sub_partition[i*2+j];
226
- const int x0 = x + j*8*zoom;
227
- const int y0 = y + i*8*zoom;
228
- l = x264_mb_partition_listX_table[0][sp] ? 0 : 1; /* FIXME: not tested if this works */
229
- if( IS_SUB8x8(sp) )
230
- {
231
- if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+8*zoom );
232
- mv( x0+4*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
233
- }
234
- else if( IS_SUB8x4(sp) )
235
- {
236
- if( drawbox ) disp_rect( 0, x0, y0, x0+8*zoom, y0+4*zoom );
237
- if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+8*zoom, y0+8*zoom );
238
- mv( x0+4*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
239
- mv( x0+4*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
240
- }
241
- else if( IS_SUB4x8(sp) )
242
- {
243
- if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+8*zoom );
244
- if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+8*zoom );
245
- mv( x0+2*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
246
- mv( x0+6*zoom, y0+4*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
247
- }
248
- else if( IS_SUB4x4(sp) )
249
- {
250
- if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom, y0+4*zoom );
251
- if( drawbox ) disp_rect( 0, x0+4*zoom, y0, x0+8*zoom, y0+4*zoom );
252
- if( drawbox ) disp_rect( 0, x0, y0+4*zoom, x0+4*zoom, y0+8*zoom );
253
- if( drawbox ) disp_rect( 0, x0+4*zoom, y0+4*zoom, x0+8*zoom, y0+8*zoom );
254
- mv( x0+2*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col );
255
- mv( x0+6*zoom, y0+2*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col );
256
- mv( x0+2*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col );
257
- mv( x0+6*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j+1], v->ref[l][2*i+1][2*j+1], zoom, col );
258
- }
259
- }
260
- }
261
- }
262
-
263
- if( IS_INTRA(v->i_type) || v->i_type == I_PCM )
264
- {
265
- /* Intra coded */
266
- if( v->i_type == I_16x16 )
267
- {
268
- switch (v->i_intra16x16_pred_mode) {
269
- case I_PRED_16x16_V:
270
- disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
271
- break;
272
- case I_PRED_16x16_H:
273
- disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
274
- break;
275
- case I_PRED_16x16_DC:
276
- case I_PRED_16x16_DC_LEFT:
277
- case I_PRED_16x16_DC_TOP:
278
- case I_PRED_16x16_DC_128:
279
- disp_line( 0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom );
280
- disp_line( 0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom );
281
- break;
282
- case I_PRED_16x16_P:
283
- disp_line( 0, x+2*zoom, y+2*zoom, x+8*zoom, y+8*zoom );
284
- break;
285
- }
286
- }
287
- if( v->i_type==I_4x4 || v->i_type==I_8x8 )
288
- {
289
- const int di = v->i_type == I_8x8 ? 2 : 1;
290
- const int zoom2 = zoom * di;
291
- for( int i = 0; i < 4; i += di )
292
- for( int j = 0; j < 4; j += di )
293
- {
294
- const int x0 = x + j*4*zoom;
295
- const int y0 = y + i*4*zoom;
296
- if( drawbox ) disp_rect( 0, x0, y0, x0+4*zoom2, y0+4*zoom2 );
297
- switch( v->intra4x4_pred_mode[i][j] )
298
- {
299
- case I_PRED_4x4_V: /* Vertical */
300
- disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
301
- break;
302
- case I_PRED_4x4_H: /* Horizontal */
303
- disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
304
- break;
305
- case I_PRED_4x4_DC: /* DC, average from top and left sides */
306
- case I_PRED_4x4_DC_LEFT:
307
- case I_PRED_4x4_DC_TOP:
308
- case I_PRED_4x4_DC_128:
309
- disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2 );
310
- disp_line( 0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2 );
311
- break;
312
- case I_PRED_4x4_DDL: /* Topright-bottomleft */
313
- disp_line( 0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2 );
314
- break;
315
- case I_PRED_4x4_DDR: /* Topleft-bottomright */
316
- disp_line( 0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2 );
317
- break;
318
- case I_PRED_4x4_VR: /* Mix of topleft-bottomright and vertical */
319
- disp_line( 0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2 );
320
- break;
321
- case I_PRED_4x4_HD: /* Mix of topleft-bottomright and horizontal */
322
- disp_line( 0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2 );
323
- break;
324
- case I_PRED_4x4_VL: /* Mix of topright-bottomleft and vertical */
325
- disp_line( 0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2 );
326
- break;
327
- case I_PRED_4x4_HU: /* Mix of topright-bottomleft and horizontal */
328
- disp_line( 0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2 );
329
- break;
330
- }
331
- }
332
- }
333
- }
334
- }
335
-
336
- disp_sync();
337
- if( waitkey )
338
- getchar();
339
-}
340
-/* }}} */
341
-
342
-//EOF
343
x264-snapshot-20130723-2245.tar.bz2/common/visualize.h
Deleted
38
1
2
-/*****************************************************************************
3
- * visualize.h: visualization
4
- *****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
- *
7
- * Authors: Tuukka Toivonen <tuukkat@ee.oulu.fi>
8
- *
9
- * This program is free software; you can redistribute it and/or modify
10
- * it under the terms of the GNU General Public License as published by
11
- * the Free Software Foundation; either version 2 of the License, or
12
- * (at your option) any later version.
13
- *
14
- * This program is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- * GNU General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU General Public License
20
- * along with this program; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
- *
23
- * This program is also available under a commercial proprietary license.
24
- * For more information, contact us at licensing@x264.com.
25
- *****************************************************************************/
26
-
27
-#ifndef X264_VISUALIZE_H
28
-#define X264_VISUALIZE_H
29
-
30
-#include "common/common.h"
31
-
32
-int x264_visualize_init( x264_t *h );
33
-void x264_visualize_mb( x264_t *h );
34
-void x264_visualize_show( x264_t *h );
35
-void x264_visualize_close( x264_t *h );
36
-
37
-#endif
38
x264-snapshot-20130723-2245.tar.bz2/tools/xyuv.c
Deleted
794
1
2
-/*****************************************************************************
3
- * xyuv.c: a SDL yuv 420 planer viewer.
4
- *****************************************************************************
5
- * Copyright (C) 2004 Laurent Aimar <fenrir@via.ecp.fr>
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License as published by
9
- * the Free Software Foundation; either version 2 of the License, or
10
- * (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
- *****************************************************************************/
21
-
22
-#include <stdlib.h>
23
-#include <stdio.h>
24
-#include <string.h>
25
-#include <stdint.h>
26
-
27
-#include <SDL/SDL.h>
28
-
29
-#define YUV_MAX 20
30
-#define SDL_TITLE "xyuv: %s - %d/%d - %.2ffps"
31
-typedef struct
32
-{
33
- /* globals */
34
- int i_width;
35
- int i_height;
36
- int i_frame_size;
37
- int i_frame;
38
- int i_frames;
39
- float f_fps;
40
-
41
- float f_y;
42
-
43
- int b_pause;
44
- int b_grid;
45
- int b_split;
46
- int b_diff;
47
- int i_join;
48
-
49
- /* Constructed picture */
50
- int i_wall_width; /* in picture count */
51
-
52
- /* YUV files */
53
- int i_yuv;
54
- struct
55
- {
56
- char *name;
57
- FILE *f; /* handles */
58
- int i_frames; /* frames count */
59
-
60
- /* Position in the whole picture */
61
- int x, y;
62
- } yuv[YUV_MAX];
63
-
64
- /* SDL */
65
- int i_sdl_width;
66
- int i_sdl_height;
67
-
68
- int i_display_width;
69
- int i_display_height;
70
- char *title;
71
-
72
- SDL_Surface *screen;
73
- SDL_Overlay *overlay;
74
-
75
- /* */
76
- uint8_t *pic;
77
-
78
-} xyuv_t;
79
-
80
-xyuv_t xyuv = {
81
- .i_width = 0,
82
- .i_height = 0,
83
- .i_frame = 1,
84
- .i_frames = 0,
85
- .f_fps = 25.0,
86
- .f_y = 0.0,
87
- .i_wall_width = 0,
88
-
89
- .i_yuv = 0,
90
-
91
- .b_pause = 0,
92
- .b_split = 0,
93
- .b_diff = 0,
94
- .i_join = -1,
95
-
96
- .title = NULL,
97
- .pic = NULL,
98
-};
99
-
100
-static void help( void )
101
-{
102
- fprintf( stderr,
103
- "Syntax: xyuv [options] file [file2 ...]\n"
104
- "\n"
105
- " --help Print this help\n"
106
- "\n"
107
- " -s, --size <WIDTHxHEIGHT> Set input size\n"
108
- " -w, --width <integer> Set width\n"
109
- " -h, --height <integer> Set height\n"
110
- "\n"
111
- " -S, --split Show splited Y/U/V planes\n"
112
- " -d, --diff Show difference (only 2 files) in split mode\n"
113
- " -j, --joint <integer>\n"
114
- "\n"
115
- " -y <float> Set Y factor\n"
116
- "\n"
117
- " -g, --grid Show a grid (macroblock 16x16)\n"
118
- " -W <integer> Set wall width (in picture count)\n"
119
- " -f, --fps <float> Set fps\n"
120
- "\n" );
121
-}
122
-
123
-static void xyuv_count_frames( xyuv_t *xyuv );
124
-static void xyuv_detect( int *pi_width, int *pi_height );
125
-static void xyuv_display( xyuv_t *xyuv, int i_frame );
126
-
127
-int main( int argc, char **argv )
128
-{
129
- int i;
130
-
131
- /* Parse commande line */
132
- for( i = 1; i < argc; i++ ) {
133
- if( !strcasecmp( argv[i], "--help" ) ) {
134
- help();
135
- return 0;
136
- }
137
- if( !strcmp( argv[i], "-d" ) || !strcasecmp( argv[i], "--diff" ) ) {
138
- xyuv.b_diff = 1;
139
- } else if( !strcmp( argv[i], "-S" ) || !strcasecmp( argv[i], "--split" ) ) {
140
- xyuv.b_split = 1;
141
- } else if( !strcmp( argv[i], "-f" ) || !strcasecmp( argv[i], "--fps" ) ) {
142
- if( i >= argc -1 ) goto err_missing_arg;
143
- xyuv.f_fps = atof( argv[++i] );
144
- } else if( !strcmp( argv[i], "-h" ) || !strcasecmp( argv[i], "--height" ) ) {
145
- if( i >= argc -1 ) goto err_missing_arg;
146
- xyuv.i_height = atoi( argv[++i] );
147
- } else if( !strcmp( argv[i], "-w" ) || !strcasecmp( argv[i], "--width" ) ) {
148
- if( i >= argc -1 ) goto err_missing_arg;
149
- xyuv.i_width = atoi( argv[++i] );
150
- } else if( !strcmp( argv[i], "-s" ) || !strcasecmp( argv[i], "--size" ) ) {
151
- char *p;
152
-
153
- if( i >= argc -1 ) goto err_missing_arg;
154
-
155
- xyuv.i_width = strtol( argv[++i], &p, 0 );
156
- p++;
157
- xyuv.i_height = atoi( p );
158
- } else if( !strcmp( argv[i], "-W" ) ) {
159
- if( i >= argc -1 ) goto err_missing_arg;
160
- xyuv.i_wall_width = atoi( argv[++i] );
161
- } else if( !strcmp( argv[i], "-y" ) ) {
162
- if( i >= argc -1 ) goto err_missing_arg;
163
- xyuv.f_y = atof( argv[++i] );
164
- } else if( !strcmp( argv[i], "-j" ) || !strcasecmp( argv[i], "--join" ) ) {
165
- if( i >= argc -1 ) goto err_missing_arg;
166
- xyuv.i_join = atoi( argv[++i] );
167
- } else if( !strcmp( argv[i], "-g" ) || !strcasecmp( argv[i], "--grid" ) ) {
168
- xyuv.b_grid = 1;
169
- } else {
170
- FILE *f = fopen( argv[i], "rb" );
171
- if( !f ) {
172
- fprintf( stderr, "cannot open YUV %s\n", argv[i] );
173
- } else {
174
- xyuv.yuv[xyuv.i_yuv].name = strdup( argv[i] );
175
- xyuv.yuv[xyuv.i_yuv].f = f;
176
- xyuv.yuv[xyuv.i_yuv].i_frames = 0;
177
-
178
- xyuv.i_yuv++;
179
- }
180
- }
181
- }
182
-
183
- if( xyuv.i_yuv == 0 ) {
184
- fprintf( stderr, "no file to display\n" );
185
- return -1;
186
- }
187
- if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
188
- char *psz = xyuv.yuv[0].name;
189
- char *num;
190
- char *x;
191
- /* See if we find widthxheight in the file name */
192
- for( ;; ) {
193
- if( !( x = strchr( psz+1, 'x' ) ) ) {
194
- break;
195
- }
196
- num = x;
197
- while( num > psz && num[-1] >= '0' && num[-1] <= '9' )
198
- num--;
199
-
200
- if( num != x && x[1] >= '0' && x[1] <= '9' ) {
201
- xyuv.i_width = atoi( num );
202
- xyuv.i_height = atoi( x+1 );
203
- break;
204
- }
205
- psz = x;
206
- }
207
- fprintf( stderr, "file name gives %dx%d\n", xyuv.i_width, xyuv.i_height );
208
- }
209
-
210
- if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
211
- xyuv_detect( &xyuv.i_width, &xyuv.i_height );
212
- }
213
-
214
- if( xyuv.i_width == 0 || xyuv.i_height == 0 ) {
215
- fprintf( stderr, "invalid or missing frames size\n" );
216
- return -1;
217
- }
218
- if( xyuv.b_diff && xyuv.i_yuv != 2 ) {
219
- fprintf( stderr, "--diff works only with 2 files\n" );
220
- return -1;
221
- }
222
- if( (xyuv.i_join == 0 || xyuv.i_join >= xyuv.i_width) && xyuv.i_yuv != 2 ) {
223
- fprintf( stderr, "--join woeks only with two files and range is [1, width-1]\n" );
224
- return -1;
225
- }
226
- if( xyuv.i_join % 2 != 0 ) {
227
- if( xyuv.i_join + 1 < xyuv.i_width )
228
- xyuv.i_join++;
229
- else
230
- xyuv.i_join--;
231
- }
232
-
233
- /* Now check frames */
234
- fprintf( stderr, "displaying :\n" );
235
- xyuv.i_frame_size = 3 * xyuv.i_width * xyuv.i_height / 2;
236
- xyuv_count_frames( &xyuv );
237
- for( i = 0; i < xyuv.i_yuv; i++ ) {
238
- fprintf( stderr, " - '%s' : %d frames\n", xyuv.yuv[i].name, xyuv.yuv[i].i_frames );
239
- }
240
-
241
- if( xyuv.i_frames == 0 ) {
242
- fprintf( stderr, "no frames to display\n" );
243
- }
244
-
245
- xyuv.pic = malloc( xyuv.i_frame_size );
246
-
247
- /* calculate SDL view */
248
- if( xyuv.i_wall_width > xyuv.i_yuv ) {
249
- xyuv.i_wall_width = xyuv.i_yuv;
250
- }
251
- if( xyuv.i_wall_width == 0 ) {
252
- while( xyuv.i_wall_width < xyuv.i_yuv && xyuv.i_wall_width * xyuv.i_wall_width < xyuv.i_yuv ) {
253
- xyuv.i_wall_width++;
254
- }
255
- }
256
-
257
- for( i = 0; i < xyuv.i_yuv; i++ ) {
258
- if( xyuv.b_diff || xyuv.i_join > 0 ) {
259
- xyuv.yuv[i].x = 0;
260
- xyuv.yuv[i].y = 0;
261
- } else if( xyuv.b_split ) {
262
- xyuv.yuv[i].x = (i%xyuv.i_wall_width) * 3 * xyuv.i_width / 2;
263
- xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
264
- } else {
265
- xyuv.yuv[i].x = (i%xyuv.i_wall_width) * xyuv.i_width;
266
- xyuv.yuv[i].y = (i/xyuv.i_wall_width) * xyuv.i_height;
267
- }
268
- }
269
- if( xyuv.b_diff ) {
270
- xyuv.i_sdl_width = 3 * xyuv.i_width / 2;
271
- xyuv.i_sdl_height= xyuv.i_height;
272
- } else if( xyuv.i_join > 0 ) {
273
- xyuv.i_sdl_width = xyuv.i_width;
274
- xyuv.i_sdl_height= xyuv.i_height;
275
- } else if( xyuv.b_split ) {
276
- xyuv.i_sdl_width = xyuv.i_wall_width * 3 * xyuv.i_width / 2;
277
- xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
278
- } else {
279
- xyuv.i_sdl_width = xyuv.i_wall_width * xyuv.i_width;
280
- xyuv.i_sdl_height= xyuv.i_height * ( ( xyuv.i_yuv + xyuv.i_wall_width - 1 ) / xyuv.i_wall_width );
281
- }
282
- xyuv.i_display_width = xyuv.i_sdl_width;
283
- xyuv.i_display_height = xyuv.i_sdl_height;
284
-
285
- /* Open SDL */
286
- if( SDL_Init( SDL_INIT_EVENTTHREAD|SDL_INIT_NOPARACHUTE|SDL_INIT_VIDEO) ) {
287
- fprintf( stderr, "cannot init SDL\n" );
288
- return -1;
289
- }
290
-
291
- SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, 100 );
292
- SDL_EventState( SDL_KEYUP, SDL_IGNORE );
293
-
294
- xyuv.screen = SDL_SetVideoMode( xyuv.i_sdl_width, xyuv.i_sdl_height, 0,
295
- SDL_HWSURFACE|SDL_RESIZABLE|
296
- SDL_ASYNCBLIT|SDL_HWACCEL );
297
- if( xyuv.screen == NULL ) {
298
- fprintf( stderr, "SDL_SetVideoMode failed\n" );
299
- return -1;
300
- }
301
-
302
- SDL_LockSurface( xyuv.screen );
303
- xyuv.overlay = SDL_CreateYUVOverlay( xyuv.i_sdl_width, xyuv.i_sdl_height,
304
- SDL_YV12_OVERLAY,
305
- xyuv.screen );
306
- /* reset with black */
307
- memset( xyuv.overlay->pixels[0], 0, xyuv.overlay->pitches[0] * xyuv.i_sdl_height );
308
- memset( xyuv.overlay->pixels[1], 128, xyuv.overlay->pitches[1] * xyuv.i_sdl_height / 2);
309
- memset( xyuv.overlay->pixels[2], 128, xyuv.overlay->pitches[2] * xyuv.i_sdl_height / 2);
310
- SDL_UnlockSurface( xyuv.screen );
311
-
312
- if( xyuv.overlay == NULL ) {
313
- fprintf( stderr, "recon: SDL_CreateYUVOverlay failed\n" );
314
- return -1;
315
- }
316
-
317
- for( ;; ) {
318
- SDL_Event event;
319
- static int b_fullscreen = 0;
320
- int64_t i_start = SDL_GetTicks();
321
- int i_wait;
322
-
323
- if( !xyuv.b_pause ) {
324
- xyuv_display( &xyuv, xyuv.i_frame );
325
- }
326
-
327
- for( ;; ) {
328
- int b_refresh = 0;
329
- while( SDL_PollEvent( &event ) ) {
330
- switch( event.type )
331
- {
332
- case SDL_QUIT:
333
- if( b_fullscreen )
334
- SDL_WM_ToggleFullScreen( xyuv.screen );
335
- exit( 1 );
336
-
337
- case SDL_KEYDOWN:
338
- switch( event.key.keysym.sym )
339
- {
340
- case SDLK_q:
341
- case SDLK_ESCAPE:
342
- if( b_fullscreen )
343
- SDL_WM_ToggleFullScreen( xyuv.screen );
344
- exit(1);
345
-
346
- case SDLK_f:
347
- if( SDL_WM_ToggleFullScreen( xyuv.screen ) )
348
- b_fullscreen = 1 - b_fullscreen;
349
- break;
350
-
351
- case SDLK_g:
352
- if( xyuv.b_grid )
353
- xyuv.b_grid = 0;
354
- else
355
- xyuv.b_grid = 1;
356
- if( xyuv.b_pause )
357
- b_refresh = 1;
358
- break;
359
-
360
- case SDLK_SPACE:
361
- if( xyuv.b_pause )
362
- xyuv.b_pause = 0;
363
- else
364
- xyuv.b_pause = 1;
365
- break;
366
- case SDLK_LEFT:
367
- if( xyuv.i_frame > 1 ) xyuv.i_frame--;
368
- b_refresh = 1;
369
- break;
370
-
371
- case SDLK_RIGHT:
372
- if( xyuv.i_frame >= xyuv.i_frames )
373
- xyuv_count_frames( &xyuv );
374
- if( xyuv.i_frame < xyuv.i_frames ) xyuv.i_frame++;
375
- b_refresh = 1;
376
- break;
377
-
378
- case SDLK_HOME:
379
- xyuv.i_frame = 1;
380
- if( xyuv.b_pause )
381
- b_refresh = 1;
382
- break;
383
-
384
- case SDLK_END:
385
- xyuv_count_frames( &xyuv );
386
- xyuv.i_frame = xyuv.i_frames;
387
- b_refresh = 1;
388
- break;
389
-
390
- case SDLK_UP:
391
- xyuv.i_frame += xyuv.i_frames / 20;
392
-
393
- if( xyuv.i_frame >= xyuv.i_frames )
394
- xyuv_count_frames( &xyuv );
395
-
396
- if( xyuv.i_frame > xyuv.i_frames )
397
- xyuv.i_frame = xyuv.i_frames;
398
- b_refresh = 1;
399
- break;
400
-
401
- case SDLK_DOWN:
402
- xyuv.i_frame -= xyuv.i_frames / 20;
403
- if( xyuv.i_frame < 1 )
404
- xyuv.i_frame = 1;
405
- b_refresh = 1;
406
- break;
407
-
408
- case SDLK_PAGEUP:
409
- xyuv.i_frame += xyuv.i_frames / 10;
410
-
411
- if( xyuv.i_frame >= xyuv.i_frames )
412
- xyuv_count_frames( &xyuv );
413
-
414
- if( xyuv.i_frame > xyuv.i_frames )
415
- xyuv.i_frame = xyuv.i_frames;
416
- b_refresh = 1;
417
- break;
418
-
419
- case SDLK_PAGEDOWN:
420
- xyuv.i_frame -= xyuv.i_frames / 10;
421
- if( xyuv.i_frame < 1 )
422
- xyuv.i_frame = 1;
423
- b_refresh = 1;
424
- break;
425
-
426
- default:
427
- break;
428
- }
429
- break;
430
- case SDL_VIDEORESIZE:
431
- xyuv.i_display_width = event.resize.w;
432
- xyuv.i_display_height = event.resize.h;
433
- xyuv.screen = SDL_SetVideoMode( xyuv.i_display_width, xyuv.i_display_height, 0,
434
- SDL_HWSURFACE|SDL_RESIZABLE|
435
- SDL_ASYNCBLIT|SDL_HWACCEL );
436
- xyuv_display( &xyuv, xyuv.i_frame );
437
- break;
438
-
439
- default:
440
- break;
441
- }
442
- }
443
- if( b_refresh ) {
444
- xyuv.b_pause = 1;
445
- xyuv_display( &xyuv, xyuv.i_frame );
446
- }
447
- /* wait */
448
- i_wait = 1000 / xyuv.f_fps - ( SDL_GetTicks() - i_start);
449
- if( i_wait < 0 )
450
- break;
451
- else if( i_wait > 200 )
452
- SDL_Delay( 200 );
453
- else {
454
- SDL_Delay( i_wait );
455
- break;
456
- }
457
- }
458
- if( !xyuv.b_pause ) {
459
- /* next frame */
460
- if( xyuv.i_frame == xyuv.i_frames )
461
- xyuv.b_pause = 1;
462
- else if( xyuv.i_frame < xyuv.i_frames )
463
- xyuv.i_frame++;
464
- }
465
- }
466
-
467
-
468
- return 0;
469
-
470
-err_missing_arg:
471
- fprintf( stderr, "missing arg for option=%s\n", argv[i] );
472
- return -1;
473
-}
474
-
475
-
476
-static void xyuv_display( xyuv_t *xyuv, int i_frame )
477
-{
478
- SDL_Rect rect;
479
- int i_picture = 0;
480
- int i;
481
-
482
- if( i_frame > xyuv->i_frames )
483
- return;
484
-
485
- xyuv->i_frame = i_frame;
486
-
487
- /* Load and copy pictue data */
488
- for( i = 0; i < xyuv->i_yuv; i++ ) {
489
- int i_plane;
490
-
491
- fprintf( stderr, "yuv[%d] %d/%d\n", i, i_frame, xyuv->yuv[i].i_frames );
492
- if( i_frame - 1 >= xyuv->yuv[i].i_frames ) {
493
- xyuv_count_frames( xyuv );
494
- if( i_frame - 1 >= xyuv->yuv[i].i_frames )
495
- continue;
496
- }
497
- i_picture++;
498
-
499
- fseek( xyuv->yuv[i].f, (xyuv->i_frame-1) * xyuv->i_frame_size, SEEK_SET );
500
- fread( xyuv->pic, xyuv->i_frame_size, 1, xyuv->yuv[i].f );
501
-
502
- SDL_LockYUVOverlay( xyuv->overlay );
503
-
504
- if( xyuv->b_diff || xyuv->b_split ) {
505
- /* Reset UV */
506
- for( i_plane = 1; i_plane < 3; i_plane++ ) {
507
- memset( xyuv->overlay->pixels[i_plane], 128, xyuv->overlay->pitches[i_plane] * xyuv->overlay->h / 2 );
508
- }
509
- /* Show diff in Y plane of overlay */
510
-
511
- for( i_plane = 0; i_plane < 3; i_plane++ ) {
512
- int div = i_plane == 0 ? 1 : 2;
513
- uint8_t *src = xyuv->pic;
514
- uint8_t *dst = xyuv->overlay->pixels[0] +
515
- (xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[0] );
516
- int j;
517
- if( i_plane == 1 ) {
518
- src += 5*xyuv->i_width * xyuv->i_height/4;
519
- dst += xyuv->i_width;
520
- } else if( i_plane == 2 ) {
521
- src += xyuv->i_width * xyuv->i_height;
522
- dst += xyuv->i_width + xyuv->i_height / 2 * xyuv->overlay->pitches[0];
523
- }
524
-
525
- for( j = 0; j < xyuv->i_height / div; j++ ) {
526
- if( i_picture == 1 || xyuv->b_split ) {
527
- memcpy( dst, src, xyuv->i_width / div );
528
- } else {
529
- int k;
530
- for( k = 0; k < xyuv->i_width / div; k++ ) {
531
- dst[k] = abs( dst[k] - src[k]);
532
- }
533
- }
534
- src += xyuv->i_width / div;
535
- dst += xyuv->overlay->pitches[0];
536
- }
537
- }
538
- } else {
539
- for( i_plane = 0; i_plane < 3; i_plane++ ) {
540
- int div = i_plane == 0 ? 1 : 2;
541
- uint8_t *src = xyuv->pic;
542
- uint8_t *dst = xyuv->overlay->pixels[i_plane] +
543
- ((xyuv->yuv[i].x + xyuv->yuv[i].y * xyuv->overlay->pitches[i_plane] ) / div );
544
- int w = xyuv->i_width / div;
545
- int j;
546
-
547
- if( i_plane == 1 ) {
548
- src += 5*xyuv->i_width * xyuv->i_height/4;
549
- } else if( i_plane == 2 ) {
550
- src += xyuv->i_width * xyuv->i_height;
551
- }
552
- if( xyuv->i_join > 0 ) {
553
- if( i_picture > 1 ) {
554
- src += xyuv->i_join / div;
555
- dst += xyuv->i_join / div;
556
- w = (xyuv->i_width - xyuv->i_join) /div;
557
- } else {
558
- w = xyuv->i_join / div;
559
- }
560
- }
561
-
562
- for( j = 0; j < xyuv->i_height / div; j++ ) {
563
- memcpy( dst, src, w );
564
- src += xyuv->i_width / div;
565
- dst += xyuv->overlay->pitches[i_plane];
566
- }
567
- }
568
- }
569
-
570
- SDL_UnlockYUVOverlay( xyuv->overlay );
571
- }
572
-
573
- if( xyuv->f_y != 0.0 ) {
574
- uint8_t *pix = xyuv->overlay->pixels[0];
575
- int j;
576
-
577
- for( j = 0; j < xyuv->i_sdl_height; j++ ) {
578
- int k;
579
- for( k = 0; k < xyuv->i_sdl_width; k++ ) {
580
- int v= pix[k] * xyuv->f_y;
581
- if( v > 255 )
582
- pix[k] = 255;
583
- else if( v < 0 )
584
- pix[k] = 0;
585
- else
586
- pix[k] = v;
587
- }
588
- pix += xyuv->overlay->pitches[0];
589
- }
590
- }
591
- if( xyuv->b_grid ) {
592
- int x, y;
593
-
594
- for( y = 0; y < xyuv->i_sdl_height; y += 4 ) {
595
- uint8_t *p = xyuv->overlay->pixels[0] + y * xyuv->overlay->pitches[0];
596
- for( x = 0; x < xyuv->i_sdl_width; x += 4 ) {
597
- if( x%16== 0 || y%16 == 0 )
598
- p[x] = 0;
599
- }
600
- }
601
- }
602
-
603
- /* Update display */
604
- rect.x = 0;
605
- rect.y = 0;
606
- rect.w = xyuv->i_display_width;
607
- rect.h = xyuv->i_display_height;
608
- SDL_DisplayYUVOverlay( xyuv->overlay, &rect );
609
-
610
- /* Display title */
611
- if( xyuv->title )
612
- free( xyuv->title );
613
- asprintf( &xyuv->title, SDL_TITLE, xyuv->yuv[0].name, xyuv->i_frame, xyuv->i_frames, xyuv->f_fps );
614
- SDL_WM_SetCaption( xyuv->title, "" );
615
-}
616
-
617
-static void xyuv_count_frames( xyuv_t *xyuv )
618
-{
619
- int i;
620
-
621
- xyuv->i_frames = 0;
622
- if( xyuv->i_frame_size <= 0 )
623
- return;
624
-
625
- for( i = 0; i < xyuv->i_yuv; i++ ) {
626
- /* Beurk but avoid using fstat */
627
- fseek( xyuv->yuv[i].f, 0, SEEK_END );
628
-
629
- xyuv->yuv[i].i_frames = ftell( xyuv->yuv[i].f ) / xyuv->i_frame_size;
630
- fprintf( stderr, "count (%d) -> %d\n", i, xyuv->yuv[i].i_frames );
631
-
632
- fseek( xyuv->yuv[i].f, 0, SEEK_SET );
633
-
634
- if( xyuv->i_frames < xyuv->yuv[i].i_frames )
635
- xyuv->i_frames = xyuv->yuv[i].i_frames;
636
- }
637
-}
638
-
639
-static inline int ssd( int a ) { return a*a; }
640
-
641
-static void xyuv_detect( int *pi_width, int *pi_height )
642
-{
643
- static const int pi_size[][2] = {
644
- {128, 96},
645
- {160,120},
646
- {320,244},
647
- {320,288},
648
-
649
- /* PAL */
650
- {176,144}, // QCIF
651
- {352,288}, // CIF
652
- {352,576}, // 1/2 D1
653
- {480,576}, // 2/3 D1
654
- {544,576},
655
- {640,576}, // VGA
656
- {704,576}, // D1
657
- {720,576}, // D1
658
-
659
- /* NTSC */
660
- {176,112}, // QCIF
661
- {320,240}, // MPEG I
662
- {352,240}, // CIF
663
- {352,480}, // 1/2 D1
664
- {480,480}, // 2/3 D1
665
- {544,480},
666
- {640,480}, // VGA
667
- {704,480}, // D1
668
- {720,480}, // D1
669
-
670
- /* */
671
- {0,0},
672
- };
673
- int i_max;
674
- int i_size_max;
675
- uint8_t *pic;
676
- int i;
677
-
678
- *pi_width = 0;
679
- *pi_height = 0;
680
-
681
- /* Compute size max */
682
- for( i_max = 0, i_size_max = 0;
683
- pi_size[i_max][0] != 0 && pi_size[i_max][1] != 0; i_max++ ) {
684
- int s = pi_size[i_max][0] * pi_size[i_max][1] * 3 / 2;
685
-
686
- if( i_size_max < s )
687
- i_size_max = s;
688
- }
689
-
690
- /* Temporary buffer */
691
- i_size_max *= 3;
692
- pic = malloc( i_size_max );
693
-
694
- fprintf( stderr, "guessing size for:\n" );
695
- for( i = 0; i < xyuv.i_yuv; i++ ) {
696
- int j;
697
- int i_read;
698
- double dbest = 255*255;
699
- int i_best = i_max;
700
- int64_t t;
701
-
702
- fprintf( stderr, " - %s\n", xyuv.yuv[i].name );
703
-
704
- i_read = fread( pic, 1, i_size_max, xyuv.yuv[i].f );
705
- if( i_read < 0 )
706
- continue;
707
-
708
- /* Check if file size is at least compatible with one format
709
- * (if not, ignore file size)*/
710
- fseek( xyuv.yuv[i].f, 0, SEEK_END );
711
- t = ftell( xyuv.yuv[i].f );
712
- fseek( xyuv.yuv[i].f, 0, SEEK_SET );
713
- for( j = 0; j < i_max; j++ ) {
714
- const int w = pi_size[j][0];
715
- const int h = pi_size[j][1];
716
- const int s = w * h * 3 / 2;
717
-
718
- if( t % s == 0 )
719
- break;
720
- }
721
- if( j == i_max )
722
- t = 0;
723
-
724
-
725
- /* Try all size */
726
- for( j = 0; j < i_max; j++ ) {
727
- const int w = pi_size[j][0];
728
- const int h = pi_size[j][1];
729
- const int s = w * h * 3 / 2;
730
- double dd;
731
-
732
- int x, y, n;
733
- int64_t d;
734
-
735
- /* To small */
736
- if( i_read < 3*s )
737
- continue;
738
- /* Check file size */
739
- if( ( t > 0 && (t % s) != 0 ) ) {
740
- fprintf( stderr, " * %dx%d ignored (incompatible file size)\n", w, h );
741
- continue;
742
- }
743
-
744
-
745
- /* We do a simple ssd between 2 consecutives lines */
746
- d = 0;
747
- for( n = 0; n < 3; n++ ) {
748
- uint8_t *p;
749
-
750
- /* Y */
751
- p = &pic[n*s];
752
- for( y = 0; y < h-1; y++ ) {
753
- for( x = 0; x < w; x++ )
754
- d += ssd( p[x] - p[w+x] );
755
- p += w;
756
- }
757
-
758
- /* U */
759
- p = &pic[n*s+w*h];
760
- for( y = 0; y < h/2-1; y++ ) {
761
- for( x = 0; x < w/2; x++ )
762
- d += ssd( p[x] - p[(w/2)+x] );
763
- p += w/2;
764
- }
765
-
766
- /* V */
767
- p = &pic[n*s+5*w*h/4];
768
- for( y = 0; y < h/2-1; y++ ) {
769
- for( x = 0; x < w/2; x++ )
770
- d += ssd( p[x] - p[(w/2)+x] );
771
- p += w/2;
772
- }
773
- }
774
- dd = (double)d / (3*w*h*3/2);
775
- fprintf( stderr, " * %dx%d d=%f\n", w, h, dd );
776
-
777
- if( dd < dbest ) {
778
- i_best = j;
779
- dbest = dd;
780
- }
781
- }
782
-
783
- fseek( xyuv.yuv[i].f, 0, SEEK_SET );
784
-
785
- if( i_best < i_max ) {
786
- fprintf( stderr, " -> %dx%d\n", pi_size[i_best][0], pi_size[i_best][1] );
787
- *pi_width = pi_size[i_best][0];
788
- *pi_height = pi_size[i_best][1];
789
- }
790
- }
791
-
792
- free( pic );
793
-}
794
x264-snapshot-20130723-2245.tar.bz2/AUTHORS -> x264-snapshot-20140321-2245.tar.bz2/AUTHORS
Changed
20
1
2
S: Brittany, France
3
4
N: Henrik Gramner
5
-E: hengar-6 AT student DOT ltu DOT se
6
-D: 4:2:2 chroma subsampling, x86 asm
7
+E: henrik AT gramner DOT com
8
+D: 4:2:2 chroma subsampling, x86 asm, Windows improvements, bugfixes
9
S: Sweden
10
11
N: Jason Garrett-Glaser
12
13
E: radoslaw AT syskin DOT cjb DOT net
14
D: Cached motion compensation
15
16
-N: Tuukka Toivonen
17
-E: tuukkat AT ee DOT oulu DOT fi
18
-D: Visualization
19
-
20
x264-snapshot-20130723-2245.tar.bz2/Makefile -> x264-snapshot-20140321-2245.tar.bz2/Makefile
Changed
56
1
2
SRCCLI += output/mp4.c
3
endif
4
5
-# Visualization sources
6
-ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
7
-SRCS += common/visualize.c common/display-x11.c
8
+ifneq ($(findstring HAVE_LSMASH 1, $(CONFIG)),)
9
+SRCCLI += output/mp4_lsmash.c
10
endif
11
12
# MMX/SSE optims
13
14
rm -f config.mak x264_config.h config.h config.log x264.pc x264.def
15
16
install-cli: cli
17
- install -d $(DESTDIR)$(bindir)
18
- install x264$(EXE) $(DESTDIR)$(bindir)
19
+ $(INSTALL) -d $(DESTDIR)$(bindir)
20
+ $(INSTALL) x264$(EXE) $(DESTDIR)$(bindir)
21
22
install-lib-dev:
23
- install -d $(DESTDIR)$(includedir)
24
- install -d $(DESTDIR)$(libdir)
25
- install -d $(DESTDIR)$(libdir)/pkgconfig
26
- install -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
27
- install -m 644 x264_config.h $(DESTDIR)$(includedir)
28
- install -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
29
+ $(INSTALL) -d $(DESTDIR)$(includedir)
30
+ $(INSTALL) -d $(DESTDIR)$(libdir)
31
+ $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
32
+ $(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir)
33
+ $(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir)
34
+ $(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig
35
36
install-lib-static: lib-static install-lib-dev
37
- install -m 644 $(LIBX264) $(DESTDIR)$(libdir)
38
+ $(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir)
39
$(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264))
40
41
install-lib-shared: lib-shared install-lib-dev
42
ifneq ($(IMPLIBNAME),)
43
- install -d $(DESTDIR)$(bindir)
44
- install -m 755 $(SONAME) $(DESTDIR)$(bindir)
45
- install -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
46
+ $(INSTALL) -d $(DESTDIR)$(bindir)
47
+ $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir)
48
+ $(INSTALL) -m 644 $(IMPLIBNAME) $(DESTDIR)$(libdir)
49
else ifneq ($(SONAME),)
50
ln -f -s $(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX)
51
- install -m 755 $(SONAME) $(DESTDIR)$(libdir)
52
+ $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(libdir)
53
endif
54
55
uninstall:
56
x264-snapshot-20130723-2245.tar.bz2/common/arm/asm.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/asm.S
Changed
19
1
2
/*****************************************************************************
3
* asm.S: arm utility macros
4
*****************************************************************************
5
- * Copyright (C) 2008-2013 x264 project
6
+ * Copyright (C) 2008-2014 x264 project
7
*
8
* Authors: Mans Rullgard <mans@mansr.com>
9
* David Conrad <lessen42@gmail.com>
10
11
12
#include "config.h"
13
14
+.syntax unified
15
+
16
#ifdef PREFIX
17
# define EXTERN_ASM _
18
#else
19
x264-snapshot-20130723-2245.tar.bz2/common/arm/cpu-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/cpu-a.S
Changed
28
1
2
/*****************************************************************************
3
* cpu-a.S: arm cpu detection
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
11
#include "asm.S"
12
13
.fpu neon
14
-.align
15
+.align 2
16
17
// done in gas because .fpu neon overrides the refusal to assemble
18
// instructions the selected -march/-mcpu doesn't support
19
20
sub r2, r2, r1
21
cmpgt r2, #30 << 3 // assume context switch if it took over 30 cycles
22
addle r3, r3, r2
23
- subles ip, ip, #1
24
+ subsle ip, ip, #1
25
bgt average_loop
26
27
// disable counters if we enabled them
28
x264-snapshot-20130723-2245.tar.bz2/common/arm/dct-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct-a.S
Changed
10
1
2
/****************************************************************************
3
* dct-a.S: arm transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/dct.h
Changed
10
1
2
/*****************************************************************************
3
* dct.h: arm transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/deblock-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/deblock-a.S
Changed
10
1
2
/*****************************************************************************
3
* deblock.S: arm deblocking
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Mans Rullgard <mans@mansr.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-a.S
Changed
50
1
2
/*****************************************************************************
3
* mc.S: arm motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
* Mans Rullgard <mans@mansr.com>
10
11
ldr ip, [sp, #8]
12
push {r4-r6,lr}
13
cmp ip, #32
14
- ldrd r4, [sp, #16]
15
+ ldrd r4, r5, [sp, #16]
16
mov lr, #\h
17
beq x264_pixel_avg_w\w\()_neon
18
rsbs r6, ip, #64
19
20
.ifc \type, full
21
ldr lr, [r4, #32] // denom
22
.endif
23
- ldrd r4, [r4, #32+4] // scale, offset
24
+ ldrd r4, r5, [r4, #32+4] // scale, offset
25
vdup.16 q0, r4
26
vdup.16 q1, r5
27
.ifc \type, full
28
29
function x264_mc_chroma_neon
30
push {r4-r8, lr}
31
vpush {d8-d11}
32
- ldrd r4, [sp, #56]
33
- ldrd r6, [sp, #64]
34
+ ldrd r4, r5, [sp, #56]
35
+ ldrd r6, r7, [sp, #64]
36
37
asr lr, r6, #3
38
mul lr, r4, lr
39
40
function x264_frame_init_lowres_core_neon
41
push {r4-r10,lr}
42
vpush {d8-d15}
43
- ldrd r4, [sp, #96]
44
- ldrd r6, [sp, #104]
45
+ ldrd r4, r5, [sp, #96]
46
+ ldrd r6, r7, [sp, #104]
47
ldr lr, [sp, #112]
48
sub r10, r6, r7 // dst_stride - width
49
and r10, r10, #~15
50
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc-c.c
Changed
10
1
2
/*****************************************************************************
3
* mc-c.c: arm motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/mc.h
Changed
10
1
2
/*****************************************************************************
3
* mc.h: arm motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel-a.S
Changed
119
1
2
/*****************************************************************************
3
* pixel.S: arm pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
11
function x264_pixel_sad_x\x\()_\w\()x\h\()_neon
12
push {r6-r7,lr}
13
.if \x == 3
14
- ldrd r6, [sp, #12]
15
+ ldrd r6, r7, [sp, #12]
16
.else
17
- ldrd r6, [sp, #16]
18
+ ldrd r6, r7, [sp, #16]
19
ldr r12, [sp, #12]
20
.endif
21
mov lr, #FENC_STRIDE
22
23
b x264_var_end
24
.endfunc
25
26
+function x264_pixel_var_8x16_neon
27
+ vld1.64 {d16}, [r0,:64], r1
28
+ vld1.64 {d18}, [r0,:64], r1
29
+ vmull.u8 q1, d16, d16
30
+ vmovl.u8 q0, d16
31
+ vld1.64 {d20}, [r0,:64], r1
32
+ vmull.u8 q2, d18, d18
33
+ vaddw.u8 q0, q0, d18
34
+
35
+ mov ip, #12
36
+
37
+ vld1.64 {d22}, [r0,:64], r1
38
+ VAR_SQR_SUM q1, q1, q14, d20, vpaddl.u16
39
+ vld1.64 {d16}, [r0,:64], r1
40
+ VAR_SQR_SUM q2, q2, q15, d22, vpaddl.u16
41
+
42
+1: subs ip, ip, #4
43
+ vld1.64 {d18}, [r0,:64], r1
44
+ VAR_SQR_SUM q1, q14, q12, d16
45
+ vld1.64 {d20}, [r0,:64], r1
46
+ VAR_SQR_SUM q2, q15, q13, d18
47
+ vld1.64 {d22}, [r0,:64], r1
48
+ VAR_SQR_SUM q1, q12, q14, d20
49
+ beq 2f
50
+ vld1.64 {d16}, [r0,:64], r1
51
+ VAR_SQR_SUM q2, q13, q15, d22
52
+ b 1b
53
+2:
54
+ VAR_SQR_SUM q2, q13, q15, d22
55
+ b x264_var_end
56
+.endfunc
57
+
58
function x264_pixel_var_16x16_neon
59
vld1.64 {d16-d17}, [r0,:128], r1
60
vmull.u8 q12, d16, d16
61
62
vadd.s32 d1, d2, d3
63
vpadd.s32 d0, d0, d1
64
65
- vmov.32 r0, r1, d0
66
+ vmov r0, r1, d0
67
vst1.32 {d0[1]}, [ip,:32]
68
mul r0, r0, r0
69
sub r0, r1, r0, lsr #6
70
bx lr
71
.endfunc
72
73
+function x264_pixel_var2_8x16_neon
74
+ vld1.64 {d16}, [r0,:64], r1
75
+ vld1.64 {d17}, [r2,:64], r3
76
+ vld1.64 {d18}, [r0,:64], r1
77
+ vld1.64 {d19}, [r2,:64], r3
78
+ vsubl.u8 q10, d16, d17
79
+ vsubl.u8 q11, d18, d19
80
+ SQR_ACC q1, d20, d21, vmull.s16
81
+ vld1.64 {d16}, [r0,:64], r1
82
+ vadd.s16 q0, q10, q11
83
+ vld1.64 {d17}, [r2,:64], r3
84
+ SQR_ACC q2, d22, d23, vmull.s16
85
+ mov ip, #14
86
+1: subs ip, ip, #2
87
+ vld1.64 {d18}, [r0,:64], r1
88
+ vsubl.u8 q10, d16, d17
89
+ vld1.64 {d19}, [r2,:64], r3
90
+ vadd.s16 q0, q0, q10
91
+ SQR_ACC q1, d20, d21
92
+ vsubl.u8 q11, d18, d19
93
+ beq 2f
94
+ vld1.64 {d16}, [r0,:64], r1
95
+ vadd.s16 q0, q0, q11
96
+ vld1.64 {d17}, [r2,:64], r3
97
+ SQR_ACC q2, d22, d23
98
+ b 1b
99
+2:
100
+ vadd.s16 q0, q0, q11
101
+ SQR_ACC q2, d22, d23
102
+
103
+ ldr ip, [sp]
104
+ vadd.s16 d0, d0, d1
105
+ vadd.s32 q1, q1, q2
106
+ vpaddl.s16 d0, d0
107
+ vadd.s32 d1, d2, d3
108
+ vpadd.s32 d0, d0, d1
109
+
110
+ vmov r0, r1, d0
111
+ vst1.32 {d0[1]}, [ip,:32]
112
+ mul r0, r0, r0
113
+ sub r0, r1, r0, lsr #7
114
+ bx lr
115
+.endfunc
116
117
.macro LOAD_DIFF_8x4 q0 q1 q2 q3
118
vld1.32 {d1}, [r2], r3
119
x264-snapshot-20130723-2245.tar.bz2/common/arm/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/pixel.h
Changed
22
1
2
/*****************************************************************************
3
* pixel.h: arm pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
11
int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
12
13
uint64_t x264_pixel_var_8x8_neon ( uint8_t *, intptr_t );
14
+uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t );
15
uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
16
-int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
17
+int x264_pixel_var2_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
18
+int x264_pixel_var2_8x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
19
20
uint64_t x264_pixel_hadamard_ac_8x8_neon ( uint8_t *, intptr_t );
21
uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
22
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-a.S
Changed
22
1
2
/*****************************************************************************
3
* predict.S: arm intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
* Mans Rullgard <mans@mansr.com>
10
11
12
function x264_predict_8x8_dc_neon
13
mov ip, #0
14
- ldrd r2, [r1, #8]
15
+ ldrd r2, r3, [r1, #8]
16
push {r4-r5,lr}
17
- ldrd r4, [r1, #16]
18
+ ldrd r4, r5, [r1, #16]
19
lsl r3, r3, #8
20
ldrb lr, [r1, #7]
21
usad8 r2, r2, ip
22
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict-c.c
Changed
10
1
2
/*****************************************************************************
3
* predict.c: arm intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/arm/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/predict.h
Changed
27
1
2
/*****************************************************************************
3
* predict.h: arm intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
11
#ifndef X264_ARM_PREDICT_H
12
#define X264_ARM_PREDICT_H
13
14
+void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
15
+void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
16
+void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
17
+void x264_predict_8x8c_dc_neon( pixel *src );
18
+void x264_predict_8x8c_h_neon( pixel *src );
19
+void x264_predict_8x8c_v_neon( pixel *src );
20
+void x264_predict_16x16_v_neon( pixel *src );
21
+void x264_predict_16x16_h_neon( pixel *src );
22
+void x264_predict_16x16_dc_neon( pixel *src );
23
+
24
void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
25
void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
26
void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] );
27
x264-snapshot-20130723-2245.tar.bz2/common/arm/quant-a.S -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant-a.S
Changed
37
1
2
/****************************************************************************
3
* quant.S: arm quantization and level-run
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
11
12
// int coeff_last( int16_t *l )
13
function x264_coeff_last4_arm
14
- ldrd r2, [r0]
15
+ ldrd r2, r3, [r0]
16
subs r0, r3, #0
17
movne r0, #2
18
movne r2, r3
19
20
21
subs r1, ip, r1, lsr #2
22
addge r0, r1, #\size - 8
23
- sublts r0, r3, r0, lsr #2
24
+ subslt r0, r3, r0, lsr #2
25
movlt r0, #0
26
bx lr
27
.endfunc
28
29
30
subs r1, ip, r1
31
addge r0, r1, #32
32
- sublts r0, ip, r0
33
+ subslt r0, ip, r0
34
movlt r0, #0
35
bx lr
36
.endfunc
37
x264-snapshot-20130723-2245.tar.bz2/common/arm/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/arm/quant.h
Changed
10
1
2
/*****************************************************************************
3
* quant.h: arm quantization and level-run
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: David Conrad <lessen42@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/bitstream.c -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.c
Changed
10
1
2
/*****************************************************************************
3
* bitstream.c: bitstream writing
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/bitstream.h -> x264-snapshot-20140321-2245.tar.bz2/common/bitstream.h
Changed
10
1
2
/*****************************************************************************
3
* bitstream.h: bitstream writing
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.c
Changed
10
1
2
/*****************************************************************************
3
* cabac.c: arithmetic coder
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/cabac.h -> x264-snapshot-20140321-2245.tar.bz2/common/cabac.h
Changed
10
1
2
/*****************************************************************************
3
* cabac.h: arithmetic coder
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/common.c -> x264-snapshot-20140321-2245.tar.bz2/common/common.c
Changed
121
1
2
/*****************************************************************************
3
* common.c: misc common functions
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
#if HAVE_MALLOC_H
12
#include <malloc.h>
13
#endif
14
+#if HAVE_THP
15
+#include <sys/mman.h>
16
+#endif
17
18
const int x264_bit_depth = BIT_DEPTH;
19
20
21
param->analyse.i_luma_deadzone[1] = 6;
22
param->rc.f_qcompress = 0.8;
23
}
24
- else if( !strncasecmp( s, "stillimage", 5 ) )
25
+ else if( !strncasecmp( s, "stillimage", 10 ) )
26
{
27
if( psy_tuning_used++ ) goto psy_failure;
28
param->i_deblocking_filter_alphac0 = -3;
29
30
}
31
OPT("bluray-compat")
32
p->b_bluray_compat = atobool(value);
33
+ OPT("avcintra-class")
34
+ p->i_avcintra_class = atoi(value);
35
OPT("sar")
36
{
37
b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) &&
38
39
}
40
OPT("log")
41
p->i_log_level = atoi(value);
42
-#if HAVE_VISUALIZE
43
- OPT("visualize")
44
- p->b_visualize = atobool(value);
45
-#endif
46
OPT("dump-yuv")
47
p->psz_dump_yuv = strdup(value);
48
OPT2("analyse", "partitions")
49
50
p->b_vfr_input = !atobool(value);
51
OPT("nal-hrd")
52
b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd );
53
+ OPT("filler")
54
+ p->rc.b_filler = atobool(value);
55
OPT("pic-struct")
56
p->b_pic_struct = atobool(value);
57
OPT("fake-interlaced")
58
59
break;
60
}
61
fprintf( stderr, "x264 [%s]: ", psz_prefix );
62
- vfprintf( stderr, psz_fmt, arg );
63
+ x264_vfprintf( stderr, psz_fmt, arg );
64
}
65
66
/****************************************************************************
67
68
};
69
70
int csp = i_csp & X264_CSP_MASK;
71
- if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX )
72
+ if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
73
return -1;
74
x264_picture_init( pic );
75
pic->img.i_csp = i_csp;
76
77
{
78
uint8_t *align_buf = NULL;
79
#if HAVE_MALLOC_H
80
- align_buf = memalign( NATIVE_ALIGN, i_size );
81
+#if HAVE_THP
82
+#define HUGE_PAGE_SIZE 2*1024*1024
83
+#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */
84
+ /* Attempt to allocate huge pages to reduce TLB misses. */
85
+ if( i_size >= HUGE_PAGE_THRESHOLD )
86
+ {
87
+ align_buf = memalign( HUGE_PAGE_SIZE, i_size );
88
+ if( align_buf )
89
+ {
90
+ /* Round up to the next huge page boundary if we are close enough. */
91
+ size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1);
92
+ madvise( align_buf, madv_size, MADV_HUGEPAGE );
93
+ }
94
+ }
95
+ else
96
+#undef HUGE_PAGE_SIZE
97
+#undef HUGE_PAGE_THRESHOLD
98
+#endif
99
+ align_buf = memalign( NATIVE_ALIGN, i_size );
100
#else
101
uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) );
102
if( buf )
103
104
int b_error = 0;
105
size_t i_size;
106
char *buf;
107
- FILE *fh = fopen( filename, "rb" );
108
+ FILE *fh = x264_fopen( filename, "rb" );
109
if( !fh )
110
return NULL;
111
b_error |= fseek( fh, 0, SEEK_END ) < 0;
112
113
s += sprintf( s, " qp=%d", p->rc.i_qp_constant );
114
115
if( p->rc.i_vbv_buffer_size )
116
- s += sprintf( s, " nal_hrd=%s", x264_nal_hrd_names[p->i_nal_hrd] );
117
+ s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler );
118
if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom )
119
s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top,
120
p->crop_rect.i_right, p->crop_rect.i_bottom );
121
x264-snapshot-20130723-2245.tar.bz2/common/common.h -> x264-snapshot-20140321-2245.tar.bz2/common/common.h
Changed
102
1
2
/*****************************************************************************
3
* common.h: misc common functions
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
memset( var, 0, size );\
12
} while( 0 )
13
14
+/* Macros for merging multiple allocations into a single large malloc, for improved
15
+ * use with huge pages. */
16
+
17
+/* Needs to be enough to contain any set of buffers that use combined allocations */
18
+#define PREALLOC_BUF_SIZE 1024
19
+
20
+#define PREALLOC_INIT\
21
+ int prealloc_idx = 0;\
22
+ size_t prealloc_size = 0;\
23
+ uint8_t **preallocs[PREALLOC_BUF_SIZE];
24
+
25
+#define PREALLOC( var, size )\
26
+do {\
27
+ var = (void*)prealloc_size;\
28
+ preallocs[prealloc_idx++] = (uint8_t**)&var;\
29
+ prealloc_size += ALIGN(size, NATIVE_ALIGN);\
30
+} while(0)
31
+
32
+#define PREALLOC_END( ptr )\
33
+do {\
34
+ CHECKED_MALLOC( ptr, prealloc_size );\
35
+ while( prealloc_idx-- )\
36
+ *preallocs[prealloc_idx] += (intptr_t)ptr;\
37
+} while(0)
38
+
39
#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
40
41
#define X264_BFRAME_MAX 16
42
43
44
#define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
45
#define FILLER_OVERHEAD (NALU_OVERHEAD+1)
46
+#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
47
48
/****************************************************************************
49
* Includes
50
51
uint8_t *nal_buffer;
52
int nal_buffer_size;
53
54
+ x264_t *reconfig_h;
55
+ int reconfig;
56
+
57
/**** thread synchronization starts here ****/
58
59
/* frame number/poc */
60
61
int (*dequant4_mf[4])[16]; /* [4][6][16] */
62
int (*dequant8_mf[4])[64]; /* [4][6][64] */
63
/* quantization matrix for trellis, [cqm][qp][coef] */
64
- int (*unquant4_mf[4])[16]; /* [4][52][16] */
65
- int (*unquant8_mf[4])[64]; /* [4][52][64] */
66
+ int (*unquant4_mf[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
67
+ int (*unquant8_mf[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
68
/* quantization matrix for deadzone */
69
- udctcoef (*quant4_mf[4])[16]; /* [4][52][16] */
70
- udctcoef (*quant8_mf[4])[64]; /* [4][52][64] */
71
- udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */
72
- udctcoef (*quant8_bias[4])[64]; /* [4][52][64] */
73
- udctcoef (*quant4_bias0[4])[16]; /* [4][52][16] */
74
- udctcoef (*quant8_bias0[4])[64]; /* [4][52][64] */
75
+ udctcoef (*quant4_mf[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
76
+ udctcoef (*quant8_mf[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
77
+ udctcoef (*quant4_bias[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
78
+ udctcoef (*quant8_bias[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
79
+ udctcoef (*quant4_bias0[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
80
+ udctcoef (*quant8_bias0[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
81
udctcoef (*nr_offset_emergency)[4][64];
82
83
/* mv/ref cost arrays. */
84
85
* and won't be copied from one thread to another */
86
87
/* mb table */
88
+ uint8_t *base; /* base pointer for all malloced data in this mb */
89
int8_t *type; /* mb type */
90
uint8_t *partition; /* mb partition */
91
int8_t *qp; /* mb qp */
92
93
x264_deblock_function_t loopf;
94
x264_bitstream_function_t bsf;
95
96
-#if HAVE_VISUALIZE
97
- struct visualize_t *visualize;
98
-#endif
99
x264_lookahead_t *lookahead;
100
101
#if HAVE_OPENCL
102
x264-snapshot-20130723-2245.tar.bz2/common/cpu.c -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.c
Changed
114
1
2
/*****************************************************************************
3
* cpu.c: cpu detection
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
#undef MMX2
12
{"Cache32", X264_CPU_CACHELINE_32},
13
{"Cache64", X264_CPU_CACHELINE_64},
14
- {"SSEMisalign", X264_CPU_SSE_MISALIGN},
15
{"LZCNT", X264_CPU_LZCNT},
16
{"BMI1", X264_CPU_BMI1},
17
{"BMI2", X264_CPU_BMI1|X264_CPU_BMI2},
18
19
uint32_t cpu = 0;
20
uint32_t eax, ebx, ecx, edx;
21
uint32_t vendor[4] = {0};
22
- uint32_t max_extended_cap;
23
+ uint32_t max_extended_cap, max_basic_cap;
24
int cache;
25
26
#if !ARCH_X86_64
27
28
#endif
29
30
x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
31
- if( eax == 0 )
32
+ max_basic_cap = eax;
33
+ if( max_basic_cap == 0 )
34
return 0;
35
36
x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
37
38
}
39
}
40
41
- x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
42
- /* AVX2 requires OS support, but BMI1/2 don't. */
43
- if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
44
- cpu |= X264_CPU_AVX2;
45
- if( ebx&0x00000008 )
46
+ if( max_basic_cap >= 7 )
47
{
48
- cpu |= X264_CPU_BMI1;
49
- if( ebx&0x00000100 )
50
- cpu |= X264_CPU_BMI2;
51
+ x264_cpu_cpuid( 7, &eax, &ebx, &ecx, &edx );
52
+ /* AVX2 requires OS support, but BMI1/2 don't. */
53
+ if( (cpu&X264_CPU_AVX) && (ebx&0x00000020) )
54
+ cpu |= X264_CPU_AVX2;
55
+ if( ebx&0x00000008 )
56
+ {
57
+ cpu |= X264_CPU_BMI1;
58
+ if( ebx&0x00000100 )
59
+ cpu |= X264_CPU_BMI2;
60
+ }
61
}
62
63
if( cpu & X264_CPU_SSSE3 )
64
65
}
66
}
67
68
- if( ecx&0x00000080 ) /* Misalign SSE */
69
- {
70
- cpu |= X264_CPU_SSE_MISALIGN;
71
- x264_cpu_mask_misalign_sse();
72
- }
73
-
74
if( cpu & X264_CPU_AVX )
75
{
76
if( ecx&0x00000800 ) /* XOP */
77
78
x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
79
cache = ecx&0xff; // cacheline size
80
}
81
- if( !cache )
82
+ if( !cache && max_basic_cap >= 2 )
83
{
84
// Cache and TLB Information
85
static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
86
87
x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" );
88
}
89
90
-#if BROKEN_STACK_ALIGNMENT
91
+#if STACK_ALIGNMENT < 16
92
cpu |= X264_CPU_STACK_MOD4;
93
#endif
94
95
96
return sysconf( _SC_NPROCESSORS_ONLN );
97
98
#elif SYS_LINUX
99
+#ifdef __ANDROID__
100
+ // Android NDK does not expose sched_getaffinity
101
+ return sysconf( _SC_NPROCESSORS_CONF );
102
+#else
103
cpu_set_t p_aff;
104
memset( &p_aff, 0, sizeof(p_aff) );
105
if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
106
107
np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
108
return np;
109
#endif
110
+#endif
111
112
#elif SYS_BEOS
113
system_info info;
114
x264-snapshot-20130723-2245.tar.bz2/common/cpu.h -> x264-snapshot-20140321-2245.tar.bz2/common/cpu.h
Changed
29
1
2
/*****************************************************************************
3
* cpu.h: cpu detection
4
*****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
*
10
11
#define x264_emms()
12
#endif
13
#define x264_sfence x264_cpu_sfence
14
-void x264_cpu_mask_misalign_sse( void );
15
void x264_safe_intel_cpu_indicator_init( void );
16
17
/* kludge:
18
19
* alignment between functions (osdep.h handles manual alignment of arrays
20
* if it doesn't).
21
*/
22
-#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
23
-int x264_stack_align( void (*func)(), ... );
24
+#if (ARCH_X86 || STACK_ALIGNMENT > 16) && HAVE_MMX
25
+intptr_t x264_stack_align( void (*func)(), ... );
26
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
27
#else
28
#define x264_stack_align(func,...) func(__VA_ARGS__)
29
x264-snapshot-20130723-2245.tar.bz2/common/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/dct.c
Changed
15
1
2
/*****************************************************************************
3
* dct.c: transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
- * Henrik Gramner <hengar-6@student.ltu.se>
11
+ * Henrik Gramner <henrik@gramner.com>
12
*
13
* This program is free software; you can redistribute it and/or modify
14
* it under the terms of the GNU General Public License as published by
15
x264-snapshot-20130723-2245.tar.bz2/common/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/dct.h
Changed
10
1
2
/*****************************************************************************
3
* dct.h: transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/deblock.c
Changed
16
1
2
/*****************************************************************************
3
* deblock.c: deblocking
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
* Jason Garrett-Glaser <darkshikari@gmail.com>
11
- * Henrik Gramner <hengar-6@student.ltu.se>
12
+ * Henrik Gramner <henrik@gramner.com>
13
*
14
* This program is free software; you can redistribute it and/or modify
15
* it under the terms of the GNU General Public License as published by
16
x264-snapshot-20130723-2245.tar.bz2/common/frame.c -> x264-snapshot-20140321-2245.tar.bz2/common/frame.c
Changed
304
1
2
/*****************************************************************************
3
* frame.c: frame handling
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
case X264_CSP_NV16:
12
case X264_CSP_I422:
13
case X264_CSP_YV16:
14
+ case X264_CSP_V210:
15
return X264_CSP_NV16;
16
case X264_CSP_I444:
17
case X264_CSP_YV24:
18
19
#endif
20
21
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
22
+ PREALLOC_INIT
23
24
/* allocate frame data (+64 for extra data for me) */
25
i_width = h->mb.i_mb_width*16;
26
27
28
for( int i = 0; i < h->param.i_bframe + 2; i++ )
29
for( int j = 0; j < h->param.i_bframe + 2; j++ )
30
- CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
31
+ PREALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
32
33
frame->i_poc = -1;
34
frame->i_type = X264_TYPE_AUTO;
35
36
{
37
int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
38
int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
39
- CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
40
- frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
41
+ PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
42
if( PARAM_INTERLACED )
43
- {
44
- CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
45
- frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
46
- }
47
+ PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
48
}
49
50
/* all 4 luma planes allocated together, since the cacheline split code
51
52
if( h->param.analyse.i_subpel_refine && b_fdec )
53
{
54
/* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
55
- CHECKED_MALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
56
+ PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) );
57
if( PARAM_INTERLACED )
58
- CHECKED_MALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
59
- for( int i = 0; i < 4; i++ )
60
- {
61
- frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
62
- frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
63
- }
64
- frame->plane[p] = frame->filtered[p][0];
65
- frame->plane_fld[p] = frame->filtered_fld[p][0];
66
+ PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) );
67
}
68
else
69
{
70
- CHECKED_MALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
71
+ PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) );
72
if( PARAM_INTERLACED )
73
- CHECKED_MALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
74
- frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
75
- frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
76
+ PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) );
77
}
78
}
79
80
81
82
if( b_fdec ) /* fdec frame */
83
{
84
- CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
85
- CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
86
- CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
87
- CHECKED_MALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
88
- M32( frame->mv16x16[0] ) = 0;
89
- frame->mv16x16++;
90
- CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
91
+ PREALLOC( frame->mb_type, i_mb_count * sizeof(int8_t) );
92
+ PREALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t) );
93
+ PREALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
94
+ PREALLOC( frame->mv16x16, 2*(i_mb_count+1) * sizeof(int16_t) );
95
+ PREALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
96
if( h->param.i_bframe )
97
{
98
- CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
99
- CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
100
+ PREALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
101
+ PREALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
102
}
103
else
104
{
105
frame->mv[1] = NULL;
106
frame->ref[1] = NULL;
107
}
108
- CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
109
- CHECKED_MALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
110
- CHECKED_MALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
111
+ PREALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
112
+ PREALLOC( frame->f_row_qp, i_lines/16 * sizeof(float) );
113
+ PREALLOC( frame->f_row_qscale, i_lines/16 * sizeof(float) );
114
if( h->param.analyse.i_me_method >= X264_ME_ESA )
115
- {
116
- CHECKED_MALLOC( frame->buffer[3],
117
- frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
118
- frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
119
- }
120
+ PREALLOC( frame->buffer[3], frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
121
if( PARAM_INTERLACED )
122
- CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
123
+ PREALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
124
if( h->param.analyse.b_mb_info )
125
- CHECKED_MALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
126
+ PREALLOC( frame->effective_qp, i_mb_count * sizeof(uint8_t) );
127
}
128
else /* fenc frame */
129
{
130
131
{
132
int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
133
134
- CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
135
- for( int i = 0; i < 4; i++ )
136
- frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
137
+ PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) );
138
139
for( int j = 0; j <= !!h->param.i_bframe; j++ )
140
for( int i = 0; i <= h->param.i_bframe; i++ )
141
{
142
- CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
143
- CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
144
+ PREALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
145
+ PREALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
146
}
147
- CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
148
+ PREALLOC( frame->i_propagate_cost, (i_mb_count+7) * sizeof(uint16_t) );
149
for( int j = 0; j <= h->param.i_bframe+1; j++ )
150
for( int i = 0; i <= h->param.i_bframe+1; i++ )
151
- CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
152
- frame->i_intra_cost = frame->lowres_costs[0][0];
153
- memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
154
+ PREALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
155
+
156
}
157
if( h->param.rc.i_aq_mode )
158
{
159
- CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
160
- CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
161
+ PREALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
162
+ PREALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
163
if( h->frames.b_have_lowres )
164
+ PREALLOC( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
165
+ }
166
+ }
167
+
168
+ PREALLOC_END( frame->base );
169
+
170
+ if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 )
171
+ {
172
+ int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
173
+ frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH;
174
+ if( PARAM_INTERLACED )
175
+ frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH;
176
+ }
177
+
178
+ for( int p = 0; p < luma_plane_count; p++ )
179
+ {
180
+ int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign );
181
+ if( h->param.analyse.i_subpel_refine && b_fdec )
182
+ {
183
+ for( int i = 0; i < 4; i++ )
184
+ {
185
+ frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
186
+ frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH;
187
+ }
188
+ frame->plane[p] = frame->filtered[p][0];
189
+ frame->plane_fld[p] = frame->filtered_fld[p][0];
190
+ }
191
+ else
192
+ {
193
+ frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH;
194
+ frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH;
195
+ }
196
+ }
197
+
198
+ if( b_fdec )
199
+ {
200
+ M32( frame->mv16x16[0] ) = 0;
201
+ frame->mv16x16++;
202
+
203
+ if( h->param.analyse.i_me_method >= X264_ME_ESA )
204
+ frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
205
+ }
206
+ else
207
+ {
208
+ if( h->frames.b_have_lowres )
209
+ {
210
+ int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );
211
+ for( int i = 0; i < 4; i++ )
212
+ frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size;
213
+
214
+ for( int j = 0; j <= !!h->param.i_bframe; j++ )
215
+ for( int i = 0; i <= h->param.i_bframe; i++ )
216
+ memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
217
+
218
+ frame->i_intra_cost = frame->lowres_costs[0][0];
219
+ memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
220
+
221
+ if( h->param.rc.i_aq_mode )
222
/* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
223
- CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
224
+ memset( frame->i_inv_qscale_factor, 0, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
225
}
226
}
227
228
229
* so freeing those pointers would cause a double free later. */
230
if( !frame->b_duplicate )
231
{
232
- for( int i = 0; i < 4; i++ )
233
- {
234
- x264_free( frame->buffer[i] );
235
- x264_free( frame->buffer_fld[i] );
236
- }
237
- for( int i = 0; i < 4; i++ )
238
- x264_free( frame->buffer_lowres[i] );
239
- for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
240
- for( int j = 0; j < X264_BFRAME_MAX+2; j++ )
241
- x264_free( frame->i_row_satds[i][j] );
242
- for( int j = 0; j < 2; j++ )
243
- for( int i = 0; i <= X264_BFRAME_MAX; i++ )
244
- {
245
- x264_free( frame->lowres_mvs[j][i] );
246
- x264_free( frame->lowres_mv_costs[j][i] );
247
- }
248
- x264_free( frame->i_propagate_cost );
249
- for( int j = 0; j <= X264_BFRAME_MAX+1; j++ )
250
- for( int i = 0; i <= X264_BFRAME_MAX+1; i++ )
251
- x264_free( frame->lowres_costs[j][i] );
252
- x264_free( frame->f_qp_offset );
253
- x264_free( frame->f_qp_offset_aq );
254
- x264_free( frame->i_inv_qscale_factor );
255
- x264_free( frame->i_row_bits );
256
- x264_free( frame->f_row_qp );
257
- x264_free( frame->f_row_qscale );
258
- x264_free( frame->field );
259
- x264_free( frame->effective_qp );
260
- x264_free( frame->mb_type );
261
- x264_free( frame->mb_partition );
262
- x264_free( frame->mv[0] );
263
- x264_free( frame->mv[1] );
264
- if( frame->mv16x16 )
265
- x264_free( frame->mv16x16-1 );
266
- x264_free( frame->ref[0] );
267
- x264_free( frame->ref[1] );
268
+ x264_free( frame->base );
269
+
270
if( frame->param && frame->param->param_free )
271
frame->param->param_free( frame->param );
272
if( frame->mb_info_free )
273
274
}
275
#endif
276
277
+ if( BIT_DEPTH != 10 && i_csp == X264_CSP_V210 )
278
+ {
279
+ x264_log( h, X264_LOG_ERROR, "v210 input is only compatible with bit-depth of 10 bits\n" );
280
+ return -1;
281
+ }
282
+
283
dst->i_type = src->i_type;
284
dst->i_qpplus1 = src->i_qpplus1;
285
dst->i_pts = dst->i_reordered_pts = src->i_pts;
286
287
288
uint8_t *pix[3];
289
int stride[3];
290
- if ( i_csp >= X264_CSP_BGR )
291
+ if( i_csp == X264_CSP_V210 )
292
+ {
293
+ stride[0] = src->img.i_stride[0];
294
+ pix[0] = src->img.plane[0];
295
+
296
+ h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0],
297
+ dst->plane[1], dst->i_stride[1],
298
+ (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height );
299
+ }
300
+ else if( i_csp >= X264_CSP_BGR )
301
{
302
stride[0] = src->img.i_stride[0];
303
pix[0] = src->img.plane[0];
304
x264-snapshot-20130723-2245.tar.bz2/common/frame.h -> x264-snapshot-20140321-2245.tar.bz2/common/frame.h
Changed
18
1
2
/*****************************************************************************
3
* frame.h: frame handling
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
typedef struct x264_frame
12
{
13
/* */
14
+ uint8_t *base; /* Base pointer for all malloced data in this frame. */
15
int i_poc;
16
int i_delta_poc[2];
17
int i_type;
18
x264-snapshot-20130723-2245.tar.bz2/common/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.c
Changed
185
1
2
/*****************************************************************************
3
* macroblock.c: macroblock common functions
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
* Loren Merritt <lorenm@u.washington.edu>
11
- * Henrik Gramner <hengar-6@student.ltu.se>
12
+ * Henrik Gramner <henrik@gramner.com>
13
*
14
* This program is free software; you can redistribute it and/or modify
15
* it under the terms of the GNU General Public License as published by
16
17
18
h->mb.b_interlaced = PARAM_INTERLACED;
19
20
- CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
21
- CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
22
- CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
23
- CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
24
- memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
25
+ PREALLOC_INIT
26
+
27
+ PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
28
+ PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
29
+ PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
30
+ PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
31
32
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
33
- CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
34
+ PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
35
36
/* all coeffs */
37
- CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
38
+ PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
39
40
if( h->param.b_cabac )
41
{
42
- CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
43
- CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
44
- CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
45
+ PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
46
+ PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
47
+ PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
48
if( h->param.i_bframe )
49
- CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
50
+ PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
51
}
52
53
for( int i = 0; i < 2; i++ )
54
55
i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
56
57
for( int j = !i; j < i_refs; j++ )
58
- {
59
- CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
60
- M32( h->mb.mvr[i][j][0] ) = 0;
61
- h->mb.mvr[i][j]++;
62
- }
63
+ PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
64
}
65
66
if( h->param.analyse.i_weighted_pred )
67
68
}
69
70
for( int i = 0; i < numweightbuf; i++ )
71
- CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
72
+ PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
73
+ }
74
+
75
+ PREALLOC_END( h->mb.base );
76
+
77
+ memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
78
+
79
+ for( int i = 0; i < 2; i++ )
80
+ {
81
+ int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
82
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
83
+ i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
84
+
85
+ for( int j = !i; j < i_refs; j++ )
86
+ {
87
+ M32( h->mb.mvr[i][j][0] ) = 0;
88
+ h->mb.mvr[i][j]++;
89
+ }
90
}
91
92
return 0;
93
94
}
95
void x264_macroblock_cache_free( x264_t *h )
96
{
97
- for( int i = 0; i < 2; i++ )
98
- for( int j = !i; j < X264_REF_MAX*2; j++ )
99
- if( h->mb.mvr[i][j] )
100
- x264_free( h->mb.mvr[i][j]-1 );
101
- for( int i = 0; i < X264_REF_MAX; i++ )
102
- x264_free( h->mb.p_weight_buf[i] );
103
-
104
- if( h->param.b_cabac )
105
- {
106
- x264_free( h->mb.skipbp );
107
- x264_free( h->mb.chroma_pred_mode );
108
- x264_free( h->mb.mvd[0] );
109
- x264_free( h->mb.mvd[1] );
110
- }
111
- x264_free( h->mb.slice_table );
112
- x264_free( h->mb.intra4x4_pred_mode );
113
- x264_free( h->mb.non_zero_count );
114
- x264_free( h->mb.mb_transform_size );
115
- x264_free( h->mb.cbp );
116
- x264_free( h->mb.qp );
117
+ x264_free( h->mb.base );
118
}
119
120
int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
121
122
((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
123
scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
124
}
125
- int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
126
+ int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
127
scratch_size = X264_MAX( scratch_size, buf_mbtree );
128
if( scratch_size )
129
CHECKED_MALLOC( h->scratch_buffer, scratch_size );
130
131
h->scratch_buffer = NULL;
132
133
int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
134
- CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
135
+ int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
136
+ scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
137
+ CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
138
139
return 0;
140
fail:
141
142
}
143
}
144
145
- if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
146
- h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
147
+ if( b_mbaff && mb_x == 0 && !(mb_y&1) )
148
+ {
149
+ if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
150
+ h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
151
+ else
152
+ h->mb.field_decoding_flag = 0;
153
+ }
154
155
/* Check whether skip here would cause decoder to predict interlace mode incorrectly.
156
* FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
157
158
if( b_mbaff )
159
{
160
if( MB_INTERLACED != h->mb.field_decoding_flag &&
161
- h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
162
+ (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
163
h->mb.b_allow_skip = 0;
164
- if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
165
- {
166
- if( h->mb.i_neighbour & MB_LEFT )
167
- {
168
- if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
169
- h->mb.b_allow_skip = 0;
170
- }
171
- else if( h->mb.i_neighbour & MB_TOP )
172
- {
173
- if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
174
- h->mb.b_allow_skip = 0;
175
- }
176
- else // Frame mb pair is predicted
177
- {
178
- if( MB_INTERLACED )
179
- h->mb.b_allow_skip = 0;
180
- }
181
- }
182
}
183
184
if( h->param.b_cabac )
185
x264-snapshot-20130723-2245.tar.bz2/common/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/common/macroblock.h
Changed
10
1
2
/*****************************************************************************
3
* macroblock.h: macroblock common functions
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/mc.c
Changed
177
1
2
/*****************************************************************************
3
* mc.c: motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
}
12
}
13
14
+void x264_plane_copy_deinterleave_v210_c( pixel *dsty, intptr_t i_dsty,
15
+ pixel *dstc, intptr_t i_dstc,
16
+ uint32_t *src, intptr_t i_src, int w, int h )
17
+{
18
+ for( int l = 0; l < h; l++ )
19
+ {
20
+ pixel *dsty0 = dsty;
21
+ pixel *dstc0 = dstc;
22
+ uint32_t *src0 = src;
23
+
24
+ for( int n = 0; n < w; n += 3 )
25
+ {
26
+ *(dstc0++) = *src0 & 0x03FF;
27
+ *(dsty0++) = ( *src0 >> 10 ) & 0x03FF;
28
+ *(dstc0++) = ( *src0 >> 20 ) & 0x03FF;
29
+ src0++;
30
+ *(dsty0++) = *src0 & 0x03FF;
31
+ *(dstc0++) = ( *src0 >> 10 ) & 0x03FF;
32
+ *(dsty0++) = ( *src0 >> 20 ) & 0x03FF;
33
+ src0++;
34
+ }
35
+
36
+ dsty += i_dsty;
37
+ dstc += i_dstc;
38
+ src += i_src;
39
+ }
40
+}
41
+
42
static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
43
{
44
for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
45
46
47
/* Estimate the total amount of influence on future quality that could be had if we
48
* were to improve the reference samples used to inter predict any given macroblock. */
49
-static void mbtree_propagate_cost( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
50
+static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
51
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
52
{
53
- float fps = *fps_factor / 256.f;
54
+ float fps = *fps_factor;
55
for( int i = 0; i < len; i++ )
56
{
57
- float intra_cost = intra_costs[i] * inv_qscales[i];
58
- float propagate_amount = propagate_in[i] + intra_cost*fps;
59
- float propagate_num = intra_costs[i] - (inter_costs[i] & LOWRES_COST_MASK);
60
- float propagate_denom = intra_costs[i];
61
- dst[i] = (int)(propagate_amount * propagate_num / propagate_denom + 0.5f);
62
+ int intra_cost = intra_costs[i];
63
+ int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
64
+ float propagate_intra = intra_cost * inv_qscales[i];
65
+ float propagate_amount = propagate_in[i] + propagate_intra*fps;
66
+ float propagate_num = intra_cost - inter_cost;
67
+ float propagate_denom = intra_cost;
68
+ dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
69
}
70
}
71
72
+static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
73
+ int16_t *propagate_amount, uint16_t *lowres_costs,
74
+ int bipred_weight, int mb_y, int len, int list )
75
+{
76
+ unsigned stride = h->mb.i_mb_stride;
77
+ unsigned width = h->mb.i_mb_width;
78
+ unsigned height = h->mb.i_mb_height;
79
+
80
+ for( unsigned i = 0; i < len; i++ )
81
+ {
82
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
83
+ int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
84
+
85
+ if( !(lists_used & (1 << list)) )
86
+ continue;
87
+
88
+ int listamount = propagate_amount[i];
89
+ /* Apply bipred weighting. */
90
+ if( lists_used == 3 )
91
+ listamount = (listamount * bipred_weight + 32) >> 6;
92
+
93
+ /* Early termination for simple case of mv0. */
94
+ if( !M32( mvs[i] ) )
95
+ {
96
+ CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
97
+ continue;
98
+ }
99
+
100
+ int x = mvs[i][0];
101
+ int y = mvs[i][1];
102
+ unsigned mbx = (x>>5)+i;
103
+ unsigned mby = (y>>5)+mb_y;
104
+ unsigned idx0 = mbx + mby * stride;
105
+ unsigned idx2 = idx0 + stride;
106
+ x &= 31;
107
+ y &= 31;
108
+ int idx0weight = (32-y)*(32-x);
109
+ int idx1weight = (32-y)*x;
110
+ int idx2weight = y*(32-x);
111
+ int idx3weight = y*x;
112
+ idx0weight = (idx0weight * listamount + 512) >> 10;
113
+ idx1weight = (idx1weight * listamount + 512) >> 10;
114
+ idx2weight = (idx2weight * listamount + 512) >> 10;
115
+ idx3weight = (idx3weight * listamount + 512) >> 10;
116
+
117
+ if( mbx < width-1 && mby < height-1 )
118
+ {
119
+ CLIP_ADD( ref_costs[idx0+0], idx0weight );
120
+ CLIP_ADD( ref_costs[idx0+1], idx1weight );
121
+ CLIP_ADD( ref_costs[idx2+0], idx2weight );
122
+ CLIP_ADD( ref_costs[idx2+1], idx3weight );
123
+ }
124
+ else
125
+ {
126
+ /* Note: this takes advantage of unsigned representation to
127
+ * catch negative mbx/mby. */
128
+ if( mby < height )
129
+ {
130
+ if( mbx < width )
131
+ CLIP_ADD( ref_costs[idx0+0], idx0weight );
132
+ if( mbx+1 < width )
133
+ CLIP_ADD( ref_costs[idx0+1], idx1weight );
134
+ }
135
+ if( mby+1 < height )
136
+ {
137
+ if( mbx < width )
138
+ CLIP_ADD( ref_costs[idx2+0], idx2weight );
139
+ if( mbx+1 < width )
140
+ CLIP_ADD( ref_costs[idx2+1], idx3weight );
141
+ }
142
+ }
143
+ }
144
+#undef CLIP_ADD
145
+}
146
+
147
void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
148
{
149
pf->mc_luma = mc_luma;
150
151
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
152
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
153
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
154
+ pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c;
155
156
pf->hpel_filter = hpel_filter;
157
158
159
pf->integral_init8v = integral_init8v;
160
161
pf->mbtree_propagate_cost = mbtree_propagate_cost;
162
+ pf->mbtree_propagate_list = mbtree_propagate_list;
163
164
#if HAVE_MMX
165
x264_mc_init_mmx( cpu, pf );
166
167
#endif
168
169
if( cpu_independent )
170
+ {
171
pf->mbtree_propagate_cost = mbtree_propagate_cost;
172
+ pf->mbtree_propagate_list = mbtree_propagate_list;
173
+ }
174
}
175
176
void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
177
x264-snapshot-20130723-2245.tar.bz2/common/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/mc.h
Changed
34
1
2
/*****************************************************************************
3
* mc.h: motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
*
10
11
pixel *src, intptr_t i_src, int w, int h );
12
void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
13
pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h );
14
+ void (*plane_copy_deinterleave_v210)( pixel *dsty, intptr_t i_dsty,
15
+ pixel *dstc, intptr_t i_dstc,
16
+ uint32_t *src, intptr_t i_src, int w, int h );
17
void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
18
intptr_t i_stride, int i_width, int i_height, int16_t *buf );
19
20
21
weight_fn_t *offsetsub;
22
void (*weight_cache)( x264_t *, x264_weight_t * );
23
24
- void (*mbtree_propagate_cost)( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
25
+ void (*mbtree_propagate_cost)( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
26
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
27
+
28
+ void (*mbtree_propagate_list)( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
29
+ int16_t *propagate_amount, uint16_t *lowres_costs,
30
+ int bipred_weight, int mb_y, int len, int list );
31
} x264_mc_functions_t;
32
33
void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent );
34
x264-snapshot-20130723-2245.tar.bz2/common/mvpred.c -> x264-snapshot-20140321-2245.tar.bz2/common/mvpred.c
Changed
10
1
2
/*****************************************************************************
3
* mvpred.c: motion vector prediction
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/opencl.c -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.c
Changed
74
1
2
/*****************************************************************************
3
* opencl.c: OpenCL initialization and kernel compilation
4
*****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
*
8
* Authors: Steve Borho <sborho@multicorewareinc.com>
9
* Anton Mitrofanov <BugMaster@narod.ru>
10
11
12
#ifdef _WIN32
13
#include <windows.h>
14
-#define ocl_open LoadLibrary( "OpenCL" )
15
+#define ocl_open LoadLibraryW( L"OpenCL" )
16
#define ocl_close FreeLibrary
17
#define ocl_address GetProcAddress
18
#else
19
20
21
/* Try to load the cached compiled program binary, verify the device context is
22
* still valid before reuse */
23
-static cl_program x264_opencl_cache_load( x264_t *h, char *dev_name, char *dev_vendor, char *driver_version )
24
+static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version )
25
{
26
/* try to load cached program binary */
27
- FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
28
+ FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" );
29
if( !fp )
30
return NULL;
31
32
33
34
/* Save the compiled program binary to a file for later reuse. Device context
35
* is also saved in the cache file so we do not reuse stale binaries */
36
-static void x264_opencl_cache_save( x264_t *h, cl_program program, char *dev_name, char *dev_vendor, char *driver_version )
37
+static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version )
38
{
39
- FILE *fp = fopen( h->param.psz_clbin_file, "wb" );
40
+ FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" );
41
if( !fp )
42
{
43
x264_log( h, X264_LOG_INFO, "OpenCL: unable to open clbin file for write\n" );
44
45
goto fail;
46
}
47
48
- FILE *log_file = fopen( "x264_kernel_build_log.txt", "w" );
49
+ FILE *log_file = x264_fopen( "x264_kernel_build_log.txt", "w" );
50
if( !log_file )
51
{
52
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to create file x264_kernel_build_log.txt\n" );
53
54
int ret = 0;
55
56
#ifdef _WIN32
57
- hDLL = LoadLibrary( "atiadlxx.dll" );
58
+ hDLL = LoadLibraryW( L"atiadlxx.dll" );
59
if( !hDLL )
60
- hDLL = LoadLibrary( "atiadlxy.dll" );
61
+ hDLL = LoadLibraryW( L"atiadlxy.dll" );
62
#else
63
hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
64
#endif
65
66
ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
67
ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
68
ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
69
- if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
70
+ if( !ADL_Main_Control_Create || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
71
!ADL_PowerXpress_Scheme_Get )
72
goto fail1;
73
74
x264-snapshot-20130723-2245.tar.bz2/common/opencl.h -> x264-snapshot-20140321-2245.tar.bz2/common/opencl.h
Changed
10
1
2
/*****************************************************************************
3
* opencl.h: OpenCL structures and defines
4
*****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
*
8
* Authors: Steve Borho <sborho@multicorewareinc.com>
9
* Anton Mitrofanov <BugMaster@narod.ru>
10
x264-snapshot-20130723-2245.tar.bz2/common/osdep.c -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.c
Changed
109
1
2
/*****************************************************************************
3
* osdep.c: platform-specific code
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
+ * Henrik Gramner <henrik@gramner.com>
11
*
12
* This program is free software; you can redistribute it and/or modify
13
* it under the terms of the GNU General Public License as published by
14
15
16
#include "common.h"
17
18
+#ifdef _WIN32
19
+#include <windows.h>
20
+#include <io.h>
21
+#endif
22
+
23
#if SYS_WINDOWS
24
#include <sys/types.h>
25
#include <sys/timeb.h>
26
27
#include <time.h>
28
29
#if PTW32_STATIC_LIB
30
-#define WIN32_LEAN_AND_MEAN
31
-#include <windows.h>
32
/* this is a global in pthread-win32 to indicate if it has been initialized or not */
33
extern int ptw32_processInitialized;
34
#endif
35
36
{}
37
#endif
38
#endif
39
+
40
+#ifdef _WIN32
41
+/* Functions for dealing with Unicode on Windows. */
42
+FILE *x264_fopen( const char *filename, const char *mode )
43
+{
44
+ wchar_t filename_utf16[MAX_PATH];
45
+ wchar_t mode_utf16[16];
46
+ if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) )
47
+ return _wfopen( filename_utf16, mode_utf16 );
48
+ return NULL;
49
+}
50
+
51
+int x264_rename( const char *oldname, const char *newname )
52
+{
53
+ wchar_t oldname_utf16[MAX_PATH];
54
+ wchar_t newname_utf16[MAX_PATH];
55
+ if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) )
56
+ {
57
+ /* POSIX says that rename() removes the destination, but Win32 doesn't. */
58
+ _wunlink( newname_utf16 );
59
+ return _wrename( oldname_utf16, newname_utf16 );
60
+ }
61
+ return -1;
62
+}
63
+
64
+int x264_stat( const char *path, x264_struct_stat *buf )
65
+{
66
+ wchar_t path_utf16[MAX_PATH];
67
+ if( utf8_to_utf16( path, path_utf16 ) )
68
+ return _wstati64( path_utf16, buf );
69
+ return -1;
70
+}
71
+
72
+int x264_vfprintf( FILE *stream, const char *format, va_list arg )
73
+{
74
+ HANDLE console = NULL;
75
+ DWORD mode;
76
+
77
+ if( stream == stdout )
78
+ console = GetStdHandle( STD_OUTPUT_HANDLE );
79
+ else if( stream == stderr )
80
+ console = GetStdHandle( STD_ERROR_HANDLE );
81
+
82
+ /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */
83
+ if( GetConsoleMode( console, &mode ) )
84
+ {
85
+ char buf[4096];
86
+ wchar_t buf_utf16[4096];
87
+
88
+ int length = vsnprintf( buf, sizeof(buf), format, arg );
89
+ if( length > 0 && length < sizeof(buf) )
90
+ {
91
+ /* WriteConsoleW is the most reliable way to output Unicode to a console. */
92
+ int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) );
93
+ DWORD written;
94
+ WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL );
95
+ return length;
96
+ }
97
+ }
98
+ return vfprintf( stream, format, arg );
99
+}
100
+
101
+int x264_is_pipe( const char *path )
102
+{
103
+ wchar_t path_utf16[MAX_PATH];
104
+ if( utf8_to_utf16( path, path_utf16 ) )
105
+ return WaitNamedPipeW( path_utf16, 0 );
106
+ return 0;
107
+}
108
+#endif
109
x264-snapshot-20130723-2245.tar.bz2/common/osdep.h -> x264-snapshot-20140321-2245.tar.bz2/common/osdep.h
Changed
122
1
2
/*****************************************************************************
3
* osdep.h: platform-specific code
4
*****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
+ * Henrik Gramner <henrik@gramner.com>
11
*
12
* This program is free software; you can redistribute it and/or modify
13
* it under the terms of the GNU General Public License as published by
14
15
#include <stdio.h>
16
#include <sys/stat.h>
17
#include <inttypes.h>
18
+#include <stdarg.h>
19
20
#include "config.h"
21
22
+#ifdef __INTEL_COMPILER
23
+#include <mathimf.h>
24
+#else
25
+#include <math.h>
26
+#endif
27
+
28
#if !HAVE_LOG2F
29
#define log2f(x) (logf(x)/0.693147180559945f)
30
#define log2(x) (log(x)/0.693147180559945)
31
#endif
32
33
-#ifdef _WIN32
34
-#include <io.h> // _setmode()
35
-#include <fcntl.h> // _O_BINARY
36
-#endif
37
-
38
#ifdef __ICL
39
#define inline __inline
40
#define strcasecmp _stricmp
41
42
#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
43
#endif
44
45
-#ifdef __INTEL_COMPILER
46
-#include <mathimf.h>
47
-#else
48
-#include <math.h>
49
-#endif
50
-
51
#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (ARCH_X86 || ARCH_X86_64)
52
#define HAVE_X86_INLINE_ASM 1
53
#endif
54
55
#if !defined(isfinite) && (SYS_OPENBSD || SYS_SunOS)
56
#define isfinite finite
57
#endif
58
+
59
#ifdef _WIN32
60
-#define rename(src,dst) (unlink(dst), rename(src,dst)) // POSIX says that rename() removes the destination, but win32 doesn't.
61
#ifndef strtok_r
62
#define strtok_r(str,delim,save) strtok(str,delim)
63
#endif
64
+
65
+#define utf8_to_utf16( utf8, utf16 )\
66
+ MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) )
67
+FILE *x264_fopen( const char *filename, const char *mode );
68
+int x264_rename( const char *oldname, const char *newname );
69
+#define x264_struct_stat struct _stati64
70
+#define x264_fstat _fstati64
71
+int x264_stat( const char *path, x264_struct_stat *buf );
72
+int x264_vfprintf( FILE *stream, const char *format, va_list arg );
73
+int x264_is_pipe( const char *path );
74
+#else
75
+#define x264_fopen fopen
76
+#define x264_rename rename
77
+#define x264_struct_stat struct stat
78
+#define x264_fstat fstat
79
+#define x264_stat stat
80
+#define x264_vfprintf vfprintf
81
+#define x264_is_pipe(x) 0
82
#endif
83
84
#ifdef __ICL
85
86
87
#define EXPAND(x) x
88
89
-#if HAVE_32B_STACK_ALIGNMENT
90
+#if STACK_ALIGNMENT >= 32
91
#define ALIGNED_ARRAY_32( type, name, sub1, ... )\
92
ALIGNED_32( type name sub1 __VA_ARGS__ )
93
#else
94
95
#define x264_lower_thread_priority(p)
96
#endif
97
98
-static inline uint8_t x264_is_regular_file( FILE *filehandle )
99
+static inline int x264_is_regular_file( FILE *filehandle )
100
{
101
- struct stat file_stat;
102
- if( fstat( fileno( filehandle ), &file_stat ) )
103
- return -1;
104
+ x264_struct_stat file_stat;
105
+ if( x264_fstat( fileno( filehandle ), &file_stat ) )
106
+ return 1;
107
return S_ISREG( file_stat.st_mode );
108
}
109
110
-static inline uint8_t x264_is_regular_file_path( const char *filename )
111
+static inline int x264_is_regular_file_path( const char *filename )
112
{
113
- struct stat file_stat;
114
- if( stat( filename, &file_stat ) )
115
- return -1;
116
+ x264_struct_stat file_stat;
117
+ if( x264_stat( filename, &file_stat ) )
118
+ return !x264_is_pipe( filename );
119
return S_ISREG( file_stat.st_mode );
120
}
121
122
x264-snapshot-20130723-2245.tar.bz2/common/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.c
Changed
173
1
2
/*****************************************************************************
3
* pixel.c: pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
#endif
12
#if ARCH_ARM
13
# include "arm/pixel.h"
14
+# include "arm/predict.h"
15
#endif
16
#if ARCH_UltraSPARC
17
# include "sparc/pixel.h"
18
19
INTRA_MBCMP_8x8( sad, _mmx2, _c )
20
INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 )
21
#endif
22
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
23
+INTRA_MBCMP_8x8( sad, _neon, _neon )
24
+INTRA_MBCMP_8x8(sa8d, _neon, _neon )
25
+#endif
26
27
#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
28
void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
29
30
31
#if HAVE_MMX
32
#if HIGH_BIT_DEPTH
33
+#define x264_predict_8x8c_v_mmx2 x264_predict_8x8c_v_mmx
34
+#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_c
35
#define x264_predict_8x8c_v_sse2 x264_predict_8x8c_v_sse
36
#define x264_predict_8x16c_v_sse2 x264_predict_8x16c_v_sse
37
#define x264_predict_16x16_v_sse2 x264_predict_16x16_v_sse
38
INTRA_MBCMP( sad, 4x4, v, h, dc, , _mmx2, _c )
39
-INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _c )
40
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _mmx2 )
41
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _mmx2, _mmx2 )
42
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _mmx2, _mmx2 )
43
INTRA_MBCMP( sad, 16x16, v, h, dc, , _mmx2, _mmx2 )
44
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _sse2, _sse2 )
45
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _sse2, _sse2 )
46
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse2, _sse2 )
47
INTRA_MBCMP( sad, 16x16, v, h, dc, , _sse2, _sse2 )
48
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _ssse3, _sse2 )
49
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _ssse3, _sse2 )
50
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _ssse3, _sse2 )
51
INTRA_MBCMP( sad, 16x16, v, h, dc, , _ssse3, _sse2 )
52
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _sse4, _sse2 )
53
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _avx, _sse2 )
54
#else
55
#define x264_predict_8x16c_v_mmx2 x264_predict_8x16c_v_mmx
56
INTRA_MBCMP( sad, 8x16, dc, h, v, c, _mmx2, _mmx2 )
57
58
INTRA_MBCMP(satd, 8x16, dc, h, v, c, _xop, _mmx2 )
59
#endif
60
#endif
61
+#if !HIGH_BIT_DEPTH && HAVE_ARMV6
62
+INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _c )
63
+INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _c )
64
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon )
65
+INTRA_MBCMP(satd, 8x8, dc, h, v, c, _neon, _neon )
66
+INTRA_MBCMP( sad, 8x16, dc, h, v, c, _neon, _c )
67
+INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c )
68
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon )
69
+INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon )
70
+#endif
71
72
// No C implementation of intra_satd_x9. See checkasm for its behavior,
73
// or see x264_mb_analyse_intra for the entirely different algorithm we
74
75
pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_mmx2;
76
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_mmx2;
77
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmx2;
78
+ pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_mmx2;
79
+ pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2;
80
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_mmx2;
81
pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2;
82
}
83
84
pixf->asd8 = x264_pixel_asd8_sse2;
85
pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2;
86
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_sse2;
87
+ pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_sse2;
88
+ pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2;
89
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2;
90
}
91
if( cpu&X264_CPU_SSE2_IS_FAST )
92
93
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3;
94
pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
95
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_ssse3;
96
+ pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_ssse3;
97
+ pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3;
98
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_ssse3;
99
}
100
if( cpu&X264_CPU_SSE4 )
101
102
#if ARCH_X86_64
103
pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4;
104
#endif
105
+ pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4;
106
}
107
if( cpu&X264_CPU_AVX )
108
{
109
110
#if ARCH_X86_64
111
pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx;
112
#endif
113
+ pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx;
114
}
115
if( cpu&X264_CPU_XOP )
116
{
117
118
pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_cache64_sse2;
119
}
120
}
121
-
122
- if( cpu&X264_CPU_SSE_MISALIGN )
123
- {
124
- INIT2( sad_x3, _sse2_misalign );
125
- INIT2( sad_x4, _sse2_misalign );
126
- }
127
}
128
129
if( cpu&X264_CPU_SSE2_IS_FAST && !(cpu&X264_CPU_CACHELINE_64) )
130
131
}
132
else
133
{
134
- pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3;
135
- pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3;
136
- pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3;
137
+ INIT2( sad_x3, _ssse3 );
138
+ INIT5( sad_x4, _ssse3 );
139
}
140
if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) )
141
{
142
143
if( cpu&X264_CPU_AVX )
144
{
145
INIT2_NAME( sad_aligned, sad, _sse2 ); /* AVX-capable CPUs doesn't benefit from an aligned version */
146
+ INIT2( sad_x3, _avx );
147
+ INIT2( sad_x4, _avx );
148
INIT8( satd, _avx );
149
INIT7( satd_x3, _avx );
150
INIT7( satd_x4, _avx );
151
152
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_neon;
153
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon;
154
pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_neon;
155
+ pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_neon;
156
pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_neon;
157
pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_neon;
158
+ pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_neon;
159
+
160
+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_neon;
161
+ pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_neon;
162
+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_neon;
163
+ pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_neon;
164
+ pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_neon;
165
+ pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_neon;
166
+ pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_neon;
167
+ pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon;
168
+ pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_neon;
169
+ pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon;
170
171
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon;
172
pixf->ssim_end4 = x264_pixel_ssim_end4_neon;
173
x264-snapshot-20130723-2245.tar.bz2/common/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/pixel.h
Changed
15
1
2
/*****************************************************************************
3
* pixel.c: pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2004-2013 x264 project
6
+ * Copyright (C) 2004-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
- Henrik Gramner <hengar-6@student.ltu.se>
11
+ Henrik Gramner <henrik@gramner.com>
12
*
13
* This program is free software; you can redistribute it and/or modify
14
* it under the terms of the GNU General Public License as published by
15
x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.c
Changed
10
1
2
/*****************************************************************************
3
* dct.c: ppc transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
* Eric Petit <eric.petit@lapsus.org>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/dct.h
Changed
10
1
2
/*****************************************************************************
3
* dct.h: ppc transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Eric Petit <eric.petit@lapsus.org>
9
* Guillaume Poirier <gpoirier@mplayerhq.hu>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/deblock.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/deblock.c
Changed
10
1
2
/*****************************************************************************
3
* deblock.c: ppc deblocking
4
*****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
*
8
* Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.c
Changed
10
1
2
/*****************************************************************************
3
* mc.c: ppc motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Eric Petit <eric.petit@lapsus.org>
9
* Guillaume Poirier <gpoirier@mplayerhq.hu>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/mc.h
Changed
10
1
2
/*****************************************************************************
3
* mc.h: ppc motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Eric Petit <eric.petit@lapsus.org>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.c
Changed
10
1
2
/*****************************************************************************
3
* pixel.c: ppc pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Eric Petit <eric.petit@lapsus.org>
9
* Guillaume Poirier <gpoirier@mplayerhq.hu>
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/pixel.h
Changed
10
1
2
/*****************************************************************************
3
* pixel.h: ppc pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Eric Petit <eric.petit@lapsus.org>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/ppccommon.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/ppccommon.h
Changed
10
1
2
/*****************************************************************************
3
* ppccommon.h: ppc utility macros
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Eric Petit <eric.petit@lapsus.org>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.c
Changed
10
1
2
/*****************************************************************************
3
* predict.c: ppc intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
*
8
* Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/predict.h
Changed
10
1
2
/*****************************************************************************
3
* predict.h: ppc intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
*
8
* Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.c
Changed
10
1
2
/*****************************************************************************
3
* quant.c: ppc quantization
4
*****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
*
8
* Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/ppc/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/ppc/quant.h
Changed
10
1
2
/*****************************************************************************
3
* quant.c: ppc quantization
4
*****************************************************************************
5
- * Copyright (C) 2007-2013 x264 project
6
+ * Copyright (C) 2007-2014 x264 project
7
*
8
* Authors: Guillaume Poirier <gpoirier@mplayerhq.hu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/predict.c -> x264-snapshot-20140321-2245.tar.bz2/common/predict.c
Changed
16
1
2
/*****************************************************************************
3
* predict.c: intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
* Jason Garrett-Glaser <darkshikari@gmail.com>
11
- * Henrik Gramner <hengar-6@student.ltu.se>
12
+ * Henrik Gramner <henrik@gramner.com>
13
*
14
* This program is free software; you can redistribute it and/or modify
15
* it under the terms of the GNU General Public License as published by
16
x264-snapshot-20130723-2245.tar.bz2/common/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/predict.h
Changed
10
1
2
/*****************************************************************************
3
* predict.h: intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/quant.c -> x264-snapshot-20140321-2245.tar.bz2/common/quant.c
Changed
16
1
2
/*****************************************************************************
3
* quant.c: quantization and level-run
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
* Christian Heine <sennindemokrit@gmx.net>
11
- * Henrik Gramner <hengar-6@student.ltu.se>
12
+ * Henrik Gramner <henrik@gramner.com>
13
*
14
* This program is free software; you can redistribute it and/or modify
15
* it under the terms of the GNU General Public License as published by
16
x264-snapshot-20130723-2245.tar.bz2/common/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/quant.h
Changed
10
1
2
/*****************************************************************************
3
* quant.h: quantization and level-run
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/rectangle.c -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.c
Changed
10
1
2
/*****************************************************************************
3
* rectangle.c: rectangle filling
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/rectangle.h -> x264-snapshot-20140321-2245.tar.bz2/common/rectangle.h
Changed
10
1
2
/*****************************************************************************
3
* rectangle.h: rectangle filling
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/set.c -> x264-snapshot-20140321-2245.tar.bz2/common/set.c
Changed
42
1
2
/*****************************************************************************
3
* set.c: quantization init
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
*
10
11
}\
12
else\
13
{\
14
- CHECKED_MALLOC( h-> quant##w##_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
15
+ CHECKED_MALLOC( h-> quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
16
CHECKED_MALLOC( h->dequant##w##_mf[i], 6*size*sizeof(int) );\
17
- CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX+1)*size*sizeof(int) );\
18
+ CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\
19
}\
20
for( j = 0; j < i; j++ )\
21
if( deadzone[j] == deadzone[i] &&\
22
23
}\
24
else\
25
{\
26
- CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
27
- CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX+1)*size*sizeof(udctcoef) );\
28
+ CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
29
+ CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\
30
}\
31
}
32
33
34
quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
35
}
36
}
37
- for( int q = 0; q < QP_MAX+1; q++ )
38
+ for( int q = 0; q <= QP_MAX_SPEC; q++ )
39
{
40
int j;
41
for( int i_list = 0; i_list < 4; i_list++ )
42
x264-snapshot-20130723-2245.tar.bz2/common/set.h -> x264-snapshot-20140321-2245.tar.bz2/common/set.h
Changed
109
1
2
/*****************************************************************************
3
* set.h: quantization init
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
x264_cqm_jvt8i, x264_cqm_jvt8p
12
};
13
14
+// 1080i25_avci50, 1080p25_avci50
15
+static const uint8_t x264_cqm_avci50_4ic[16] =
16
+{
17
+ 16,22,28,40,
18
+ 22,28,40,44,
19
+ 28,40,44,48,
20
+ 40,44,48,60
21
+};
22
+
23
+// 1080i25_avci50,
24
+static const uint8_t x264_cqm_avci50_1080i_8iy[64] =
25
+{
26
+ 16,18,19,21,27,33,81,87,
27
+ 18,19,21,24,30,33,81,87,
28
+ 19,21,24,27,30,78,84,90,
29
+ 21,24,27,30,33,78,84,90,
30
+ 24,27,30,33,78,81,84,90,
31
+ 24,27,30,33,78,81,84,93,
32
+ 27,30,33,78,78,81,87,93,
33
+ 30,33,33,78,81,84,87,96
34
+};
35
+
36
+// 1080p25_avci50, 720p25_avci50, 720p50_avci50
37
+static const uint8_t x264_cqm_avci50_p_8iy[64] =
38
+{
39
+ 16,18,19,21,24,27,30,33,
40
+ 18,19,21,24,27,30,33,78,
41
+ 19,21,24,27,30,33,78,81,
42
+ 21,24,27,30,33,78,81,84,
43
+ 24,27,30,33,78,81,84,87,
44
+ 27,30,33,78,81,84,87,90,
45
+ 30,33,78,81,84,87,90,93,
46
+ 33,78,81,84,87,90,93,96
47
+};
48
+
49
+// 1080i25_avci100, 1080p25_avci100
50
+static const uint8_t x264_cqm_avci100_1080_4ic[16] =
51
+{
52
+ 16,20,26,32,
53
+ 20,26,32,38,
54
+ 26,32,38,44,
55
+ 32,38,44,50
56
+};
57
+
58
+// 720p25_avci100, 720p50_avci100
59
+static const uint8_t x264_cqm_avci100_720p_4ic[16] =
60
+{
61
+ 16,21,27,34,
62
+ 21,27,34,41,
63
+ 27,34,41,46,
64
+ 34,41,46,54
65
+};
66
+
67
+// 1080i25_avci100,
68
+static const uint8_t x264_cqm_avci100_1080i_8iy[64] =
69
+{
70
+ 16,19,20,23,24,26,32,42,
71
+ 18,19,22,24,26,32,36,42,
72
+ 18,20,23,24,26,32,36,63,
73
+ 19,20,23,26,32,36,42,63,
74
+ 20,22,24,26,32,36,59,63,
75
+ 22,23,24,26,32,36,59,68,
76
+ 22,23,24,26,32,42,59,68,
77
+ 22,23,24,26,36,42,59,72
78
+};
79
+
80
+// 1080p25_avci100,
81
+static const uint8_t x264_cqm_avci100_1080p_8iy[64] =
82
+{
83
+ 16,18,19,20,22,23,24,26,
84
+ 18,19,20,22,23,24,26,32,
85
+ 19,20,22,23,24,26,32,36,
86
+ 20,22,23,24,26,32,36,42,
87
+ 22,23,24,26,32,36,42,59,
88
+ 23,24,26,32,36,42,59,63,
89
+ 24,26,32,36,42,59,63,68,
90
+ 26,32,36,42,59,63,68,72
91
+};
92
+
93
+// 720p25_avci100, 720p50_avci100
94
+static const uint8_t x264_cqm_avci100_720p_8iy[64] =
95
+{
96
+ 16,18,19,21,22,24,26,32,
97
+ 18,19,19,21,22,24,26,32,
98
+ 19,19,21,22,22,24,26,32,
99
+ 21,21,22,22,23,24,26,34,
100
+ 22,22,22,23,24,25,26,34,
101
+ 24,24,24,24,25,26,34,36,
102
+ 26,26,26,26,26,34,36,38,
103
+ 32,32,32,34,34,36,38,42
104
+};
105
+
106
int x264_cqm_init( x264_t *h );
107
void x264_cqm_delete( x264_t *h );
108
int x264_cqm_parse_file( x264_t *h, const char *filename );
109
x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.asm -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.asm
Changed
10
1
2
/*****************************************************************************
3
* pixel.asm: sparc pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Phil Jensen <philj@csufresno.edu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/sparc/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/sparc/pixel.h
Changed
10
1
2
/*****************************************************************************
3
* pixel.h: sparc pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Phil Jensen <philj@csufresno.edu>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/threadpool.c -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.c
Changed
10
1
2
/*****************************************************************************
3
* threadpool.c: thread pooling
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/threadpool.h -> x264-snapshot-20140321-2245.tar.bz2/common/threadpool.h
Changed
10
1
2
/*****************************************************************************
3
* threadpool.h: thread pooling
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/common/vlc.c -> x264-snapshot-20140321-2245.tar.bz2/common/vlc.c
Changed
15
1
2
/*****************************************************************************
3
* vlc.c : vlc tables
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
- * Henrik Gramner <hengar-6@student.ltu.se>
11
+ * Henrik Gramner <henrik@gramner.com>
12
*
13
* This program is free software; you can redistribute it and/or modify
14
* it under the terms of the GNU General Public License as published by
15
x264-snapshot-20130723-2245.tar.bz2/common/win32thread.c -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.c
Changed
28
1
2
/*****************************************************************************
3
* win32thread.c: windows threading
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
* Pegasys Inc. <http://www.pegasys-inc.com>
10
11
int x264_win32_threading_init( void )
12
{
13
/* find function pointers to API functions, if they exist */
14
- HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
15
+ HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
16
thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
17
if( thread_control.cond_init )
18
{
19
20
* On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
21
#if ARCH_X86_64
22
/* find function pointers to API functions specific to x86_64 platforms, if they exist */
23
- HANDLE kernel_dll = GetModuleHandle( TEXT( "kernel32.dll" ) );
24
+ HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
25
BOOL (*get_thread_affinity)( HANDLE thread, x264_group_affinity_t *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
26
if( get_thread_affinity )
27
{
28
x264-snapshot-20130723-2245.tar.bz2/common/win32thread.h -> x264-snapshot-20140321-2245.tar.bz2/common/win32thread.h
Changed
18
1
2
/*****************************************************************************
3
* win32thread.h: windows threading
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
11
#ifndef X264_WIN32THREAD_H
12
#define X264_WIN32THREAD_H
13
14
-#define WIN32_LEAN_AND_MEAN
15
#include <windows.h>
16
/* the following macro is used within x264 */
17
#undef ERROR
18
x264-snapshot-20130723-2245.tar.bz2/common/x86/bitstream-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/bitstream-a.asm
Changed
10
1
2
;*****************************************************************************
3
;* bitstream-a.asm: x86 bitstream functions
4
;*****************************************************************************
5
-;* Copyright (C) 2010-2013 x264 project
6
+;* Copyright (C) 2010-2014 x264 project
7
;*
8
;* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
;* Henrik Gramner <henrik@gramner.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/cabac-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cabac-a.asm
Changed
10
1
2
;*****************************************************************************
3
;* cabac-a.asm: x86 cabac
4
;*****************************************************************************
5
-;* Copyright (C) 2008-2013 x264 project
6
+;* Copyright (C) 2008-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/const-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/const-a.asm
Changed
18
1
2
;*****************************************************************************
3
;* const-a.asm: x86 global constants
4
;*****************************************************************************
5
-;* Copyright (C) 2010-2013 x264 project
6
+;* Copyright (C) 2010-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
11
const pw_512, times 16 dw 512
12
const pw_00ff, times 16 dw 0x00ff
13
const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
14
+const pw_0to15, dw 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
15
const pd_1, times 8 dd 1
16
const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
17
const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
18
x264-snapshot-20130723-2245.tar.bz2/common/x86/cpu-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/cpu-a.asm
Changed
28
1
2
;*****************************************************************************
3
;* cpu-a.asm: x86 cpu utilities
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
;* Loren Merritt <lorenm@u.washington.edu>
10
11
sfence
12
ret
13
14
-;-----------------------------------------------------------------------------
15
-; void cpu_mask_misalign_sse( void )
16
-;-----------------------------------------------------------------------------
17
-cglobal cpu_mask_misalign_sse
18
- sub rsp, 4
19
- stmxcsr [rsp]
20
- or dword [rsp], 1<<17
21
- ldmxcsr [rsp]
22
- add rsp, 4
23
- ret
24
-
25
cextern intel_cpu_indicator_init
26
27
;-----------------------------------------------------------------------------
28
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-32.asm
Changed
10
1
2
;*****************************************************************************
3
;* dct-32.asm: x86_32 transform and zigzag
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Holger Lubitz <holger@lubitz.org>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-64.asm
Changed
10
1
2
;*****************************************************************************
3
;* dct-64.asm: x86_64 transform and zigzag
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Holger Lubitz <holger@lubitz.org>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct-a.asm
Changed
19
1
2
;*****************************************************************************
3
;* dct-a.asm: x86 transform and zigzag
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Holger Lubitz <holger@lubitz.org>
9
;* Loren Merritt <lorenm@u.washington.edu>
10
11
mova m6, [pw_pixel_max]
12
mova m7, [pd_32]
13
pxor m5, m5
14
-.loop
15
+.loop:
16
mova m3, [r1]
17
paddd m3, m7
18
psrad m3, 6 ; dc0 0 dc1 0 dc2 0 dc3 0
19
x264-snapshot-20130723-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/dct.h
Changed
10
1
2
/*****************************************************************************
3
* dct.h: x86 transform and zigzag
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/deblock-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/deblock-a.asm
Changed
271
1
2
;*****************************************************************************
3
;* deblock-a.asm: x86 deblocking
4
;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
11
mov r6, 2
12
mova m0, [pw_2]
13
LOAD_AB aa, bb, r2d, r3d
14
-.loop
15
+.loop:
16
mova p2, [r4+r1]
17
mova p1, [r4+2*r1]
18
mova p0, [r4+r5]
19
20
add r4, r0 ; pix+4*stride
21
mov r6, 2
22
mova m0, [pw_2]
23
-.loop
24
+.loop:
25
movu q3, [r0-8]
26
movu q2, [r0+r1-8]
27
movu q1, [r0+r1*2-8]
28
29
%define PASS8ROWS(base, base3, stride, stride3, offset) \
30
PASS8ROWS(base+offset, base3+offset, stride, stride3)
31
32
-; in: 8 rows of 4 bytes in %4..%11
33
-; out: 4 rows of 8 bytes in m0..m3
34
-%macro TRANSPOSE4x8_LOAD 11
35
- movh m0, %4
36
- movh m2, %5
37
- movh m1, %6
38
- movh m3, %7
39
- punpckl%1 m0, m2
40
- punpckl%1 m1, m3
41
- mova m2, m0
42
- punpckl%2 m0, m1
43
- punpckh%2 m2, m1
44
-
45
- movh m4, %8
46
- movh m6, %9
47
- movh m5, %10
48
- movh m7, %11
49
- punpckl%1 m4, m6
50
- punpckl%1 m5, m7
51
- mova m6, m4
52
- punpckl%2 m4, m5
53
- punpckh%2 m6, m5
54
-
55
- punpckh%3 m1, m0, m4
56
- punpckh%3 m3, m2, m6
57
- punpckl%3 m0, m4
58
- punpckl%3 m2, m6
59
-%endmacro
60
-
61
; in: 4 rows of 8 bytes in m0..m3
62
; out: 8 rows of 4 bytes in %1..%8
63
%macro TRANSPOSE8x4B_STORE 8
64
65
punpcklbw m2, m3
66
punpcklwd m1, m0, m2
67
punpckhwd m0, m2
68
- movh %1, m1
69
+ movd %1, m1
70
punpckhdq m1, m1
71
- movh %2, m1
72
- movh %3, m0
73
+ movd %2, m1
74
+ movd %3, m0
75
punpckhdq m0, m0
76
- movh %4, m0
77
+ movd %4, m0
78
79
punpckhdq m3, m3
80
punpcklbw m4, m5
81
punpcklbw m6, m3
82
punpcklwd m5, m4, m6
83
punpckhwd m4, m6
84
- movh %5, m5
85
+ movd %5, m5
86
punpckhdq m5, m5
87
- movh %6, m5
88
- movh %7, m4
89
+ movd %6, m5
90
+ movd %7, m4
91
punpckhdq m4, m4
92
- movh %8, m4
93
+ movd %8, m4
94
%endmacro
95
96
; in: 8 rows of 4 bytes in %9..%10
97
98
pextrd %8, %10, 3
99
%endmacro
100
101
-%macro TRANSPOSE4x8B_LOAD 8
102
- TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
103
-%endmacro
104
-
105
-%macro TRANSPOSE4x8W_LOAD 8
106
-%if mmsize==16
107
- TRANSPOSE4x8_LOAD wd, dq, qdq, %1, %2, %3, %4, %5, %6, %7, %8
108
-%else
109
+; in: 4 rows of 4 words in %1..%4
110
+; out: 4 rows of 4 word in m0..m3
111
+; clobbers: m4
112
+%macro TRANSPOSE4x4W_LOAD 4-8
113
+%if mmsize==8
114
SWAP 1, 4, 2, 3
115
- mova m0, [t5]
116
- mova m1, [t5+r1]
117
- mova m2, [t5+r1*2]
118
- mova m3, [t5+t6]
119
+ movq m0, %1
120
+ movq m1, %2
121
+ movq m2, %3
122
+ movq m3, %4
123
TRANSPOSE4x4W 0, 1, 2, 3, 4
124
+%else
125
+ movq m0, %1
126
+ movq m2, %2
127
+ movq m1, %3
128
+ movq m3, %4
129
+ punpcklwd m0, m2
130
+ punpcklwd m1, m3
131
+ mova m2, m0
132
+ punpckldq m0, m1
133
+ punpckhdq m2, m1
134
+ movhlps m1, m0
135
+ movhlps m3, m2
136
%endif
137
%endmacro
138
139
-%macro TRANSPOSE8x2W_STORE 8
140
+; in: 2 rows of 4 words in m1..m2
141
+; out: 4 rows of 2 words in %1..%4
142
+; clobbers: m0, m1
143
+%macro TRANSPOSE4x2W_STORE 4-8
144
+%if mmsize==8
145
punpckhwd m0, m1, m2
146
punpcklwd m1, m2
147
-%if mmsize==8
148
+%else
149
+ punpcklwd m1, m2
150
+ movhlps m0, m1
151
+%endif
152
movd %3, m0
153
movd %1, m1
154
psrlq m1, 32
155
psrlq m0, 32
156
movd %2, m1
157
movd %4, m0
158
+%endmacro
159
+
160
+; in: 4/8 rows of 4 words in %1..%8
161
+; out: 4 rows of 4/8 word in m0..m3
162
+; clobbers: m4, m5, m6, m7
163
+%macro TRANSPOSE4x8W_LOAD 8
164
+%if mmsize==8
165
+ TRANSPOSE4x4W_LOAD %1, %2, %3, %4
166
+%else
167
+ movq m0, %1
168
+ movq m2, %2
169
+ movq m1, %3
170
+ movq m3, %4
171
+ punpcklwd m0, m2
172
+ punpcklwd m1, m3
173
+ mova m2, m0
174
+ punpckldq m0, m1
175
+ punpckhdq m2, m1
176
+
177
+ movq m4, %5
178
+ movq m6, %6
179
+ movq m5, %7
180
+ movq m7, %8
181
+ punpcklwd m4, m6
182
+ punpcklwd m5, m7
183
+ mova m6, m4
184
+ punpckldq m4, m5
185
+ punpckhdq m6, m5
186
+
187
+ punpckhqdq m1, m0, m4
188
+ punpckhqdq m3, m2, m6
189
+ punpcklqdq m0, m4
190
+ punpcklqdq m2, m6
191
+%endif
192
+%endmacro
193
+
194
+; in: 2 rows of 4/8 words in m1..m2
195
+; out: 4/8 rows of 2 words in %1..%8
196
+; clobbers: m0, m1
197
+%macro TRANSPOSE8x2W_STORE 8
198
+%if mmsize==8
199
+ TRANSPOSE4x2W_STORE %1, %2, %3, %4
200
%else
201
+ punpckhwd m0, m1, m2
202
+ punpcklwd m1, m2
203
movd %5, m0
204
movd %1, m1
205
psrldq m1, 4
206
207
%endif
208
mova m6, [pb_1]
209
psubusb m4, m6 ; alpha - 1
210
- psubusb m5, m6 ; alpha - 2
211
+ psubusb m5, m6 ; beta - 1
212
%if %0>2
213
mova %3, m4
214
%endif
215
216
;-----------------------------------------------------------------------------
217
; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
218
;-----------------------------------------------------------------------------
219
-
220
%if cpuflag(avx)
221
INIT_XMM cpuname
222
%else
223
INIT_MMX cpuname
224
%endif
225
-cglobal deblock_h_luma, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
226
- mov r0, r0mp
227
+cglobal deblock_h_luma, 1,5,8,0x60+12
228
mov r3, r1m
229
lea r4, [r3*3]
230
sub r0, 4
231
lea r1, [r0+r4]
232
- %define pix_tmp esp+12*HAVE_ALIGNED_STACK
233
+ %define pix_tmp esp+12
234
+ ; esp is intentionally misaligned to make it aligned after pushing the arguments for deblock_%1_luma.
235
236
; transpose 6x16 -> tmp space
237
TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp
238
239
;-----------------------------------------------------------------------------
240
%macro DEBLOCK_H_CHROMA_420_MBAFF 0
241
cglobal deblock_h_chroma_mbaff, 5,7,8
242
- sub r0, 4
243
- lea t6, [r1*3]
244
- mov t5, r0
245
- add r0, t6
246
- TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
247
+ CHROMA_H_START
248
+ TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6)
249
LOAD_MASK r2d, r3d
250
movd m6, [r4] ; tc0
251
punpcklbw m6, m6
252
pand m7, m6
253
DEBLOCK_P0_Q0
254
- TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
255
+ TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
256
RET
257
%endmacro
258
259
260
INIT_MMX mmx2
261
cglobal deblock_h_chroma_intra_mbaff, 4,6,8
262
CHROMA_H_START
263
- TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
264
+ TRANSPOSE4x4W_LOAD PASS8ROWS(t5, r0, r1, t6)
265
call chroma_intra_body
266
- TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
267
+ TRANSPOSE4x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
268
RET
269
%endif ; !HIGH_BIT_DEPTH
270
271
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a.asm
Changed
116
1
2
;*****************************************************************************
3
;* mc-a.asm: x86 motion compensation
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
11
jg .height_loop
12
RET
13
14
+INIT_XMM
15
cglobal pixel_avg2_w16_sse2, 6,7
16
sub r4, r2
17
lea r6, [r4+r3]
18
.height_loop:
19
- movdqu xmm0, [r2]
20
- movdqu xmm2, [r2+r3]
21
- movdqu xmm1, [r2+r4]
22
- movdqu xmm3, [r2+r6]
23
+ movu m0, [r2]
24
+ movu m2, [r2+r3]
25
+ movu m1, [r2+r4]
26
+ movu m3, [r2+r6]
27
lea r2, [r2+r3*2]
28
- pavgb xmm0, xmm1
29
- pavgb xmm2, xmm3
30
- movdqa [r0], xmm0
31
- movdqa [r0+r1], xmm2
32
+ pavgb m0, m1
33
+ pavgb m2, m3
34
+ mova [r0], m0
35
+ mova [r0+r1], m2
36
lea r0, [r0+r1*2]
37
- sub r5d, 2
38
- jg .height_loop
39
+ sub r5d, 2
40
+ jg .height_loop
41
RET
42
43
-%macro AVG2_W20 1
44
-cglobal pixel_avg2_w20_%1, 6,7
45
+cglobal pixel_avg2_w20_sse2, 6,7
46
sub r2, r4
47
lea r6, [r2+r3]
48
.height_loop:
49
- movdqu xmm0, [r4]
50
- movdqu xmm2, [r4+r3]
51
-%ifidn %1, sse2_misalign
52
- movd mm4, [r4+16]
53
- movd mm5, [r4+r3+16]
54
- pavgb xmm0, [r4+r2]
55
- pavgb xmm2, [r4+r6]
56
-%else
57
- movdqu xmm1, [r4+r2]
58
- movdqu xmm3, [r4+r6]
59
- movd mm4, [r4+16]
60
- movd mm5, [r4+r3+16]
61
- pavgb xmm0, xmm1
62
- pavgb xmm2, xmm3
63
-%endif
64
- pavgb mm4, [r4+r2+16]
65
- pavgb mm5, [r4+r6+16]
66
+ movu m0, [r4]
67
+ movu m2, [r4+r3]
68
+ movu m1, [r4+r2]
69
+ movu m3, [r4+r6]
70
+ movd mm4, [r4+16]
71
+ movd mm5, [r4+r3+16]
72
+ pavgb m0, m1
73
+ pavgb m2, m3
74
+ pavgb mm4, [r4+r2+16]
75
+ pavgb mm5, [r4+r6+16]
76
lea r4, [r4+r3*2]
77
- movdqa [r0], xmm0
78
- movd [r0+16], mm4
79
- movdqa [r0+r1], xmm2
80
- movd [r0+r1+16], mm5
81
+ mova [r0], m0
82
+ mova [r0+r1], m2
83
+ movd [r0+16], mm4
84
+ movd [r0+r1+16], mm5
85
lea r0, [r0+r1*2]
86
- sub r5d, 2
87
- jg .height_loop
88
+ sub r5d, 2
89
+ jg .height_loop
90
RET
91
-%endmacro
92
-
93
-AVG2_W20 sse2
94
-AVG2_W20 sse2_misalign
95
96
INIT_YMM avx2
97
cglobal pixel_avg2_w20, 6,7
98
99
%endmacro
100
%else ; !HIGH_BIT_DEPTH
101
%macro UNPACK_UNALIGNED 3
102
-%if mmsize == 8 || cpuflag(misalign)
103
+%if mmsize == 8
104
punpcklwd %1, %3
105
%else
106
movh %2, %3
107
108
%else ; !HIGH_BIT_DEPTH
109
INIT_MMX mmx2
110
MC_CHROMA
111
-INIT_XMM sse2, misalign
112
-MC_CHROMA
113
INIT_XMM sse2
114
MC_CHROMA
115
INIT_XMM ssse3
116
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-a2.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-a2.asm
Changed
570
1
2
;*****************************************************************************
3
;* mc-a2.asm: x86 motion compensation
4
;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
11
12
SECTION_RODATA 32
13
14
+pw_1024: times 16 dw 1024
15
filt_mul20: times 32 db 20
16
filt_mul15: times 16 db 1, -5
17
filt_mul51: times 16 db -5, 1
18
19
deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15
20
21
%if HIGH_BIT_DEPTH
22
+v210_mask: times 4 dq 0xc00ffc003ff003ff
23
+v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15
24
+v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14
25
+; vpermd indices {0,1,2,4,5,7,_,_} merged in the 3 lsb of each dword to save a register
26
+v210_mult: dw 0x2000,0x7fff,0x0801,0x2000,0x7ffa,0x0800,0x7ffc,0x0800
27
+ dw 0x1ffd,0x7fff,0x07ff,0x2000,0x7fff,0x0800,0x7fff,0x0800
28
+
29
deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14
30
deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15
31
%else
32
+deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1
33
+ db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1
34
+
35
deinterleave_shuf32a: db 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30
36
deinterleave_shuf32b: db 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31
37
-%endif
38
-pw_1024: times 16 dw 1024
39
+%endif ; !HIGH_BIT_DEPTH
40
41
pd_16: times 4 dd 16
42
pd_0f: times 4 dd 0xffff
43
-pf_inv256: times 8 dd 0.00390625
44
45
pad10: times 8 dw 10*PIXEL_MAX
46
pad20: times 8 dw 20*PIXEL_MAX
47
48
tap2: times 4 dw 20, 20
49
tap3: times 4 dw -5, 1
50
51
+pw_0xc000: times 8 dw 0xc000
52
+pw_31: times 8 dw 31
53
+pd_4: times 4 dd 4
54
+
55
SECTION .text
56
57
cextern pb_0
58
cextern pw_1
59
+cextern pw_8
60
cextern pw_16
61
cextern pw_32
62
cextern pw_512
63
cextern pw_00ff
64
cextern pw_3fff
65
cextern pw_pixel_max
66
+cextern pw_0to15
67
cextern pd_ffff
68
69
%macro LOAD_ADD 4
70
71
%define pw_rnd [pw_32]
72
%endif
73
; This doesn't seem to be faster (with AVX) on Sandy Bridge or Bulldozer...
74
-%if cpuflag(misalign) || mmsize==32
75
+%if mmsize==32
76
.loop:
77
movu m4, [src-4]
78
movu m5, [src-2]
79
80
HPEL_V 0
81
INIT_XMM sse2
82
HPEL_V 8
83
-INIT_XMM sse2, misalign
84
-HPEL_C
85
%if ARCH_X86_64 == 0
86
INIT_XMM sse2
87
HPEL_C
88
89
RET
90
%endmacro ; PLANE_DEINTERLEAVE
91
92
+%macro PLANE_DEINTERLEAVE_RGB_CORE 9 ; pw, i_dsta, i_dstb, i_dstc, i_src, w, h, tmp1, tmp2
93
+%if cpuflag(ssse3)
94
+ mova m3, [deinterleave_rgb_shuf+(%1-3)*16]
95
+%endif
96
+%%loopy:
97
+ mov %8, r6
98
+ mov %9, %6
99
+%%loopx:
100
+ movu m0, [%8]
101
+ movu m1, [%8+%1*mmsize/4]
102
+%if cpuflag(ssse3)
103
+ pshufb m0, m3 ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
104
+ pshufb m1, m3 ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
105
+%elif %1 == 3
106
+ psrldq m2, m0, 6
107
+ punpcklqdq m0, m1 ; b0 g0 r0 b1 g1 r1 __ __ b4 g4 r4 b5 g5 r5
108
+ psrldq m1, 6
109
+ punpcklqdq m2, m1 ; b2 g2 r2 b3 g3 r3 __ __ b6 g6 r6 b7 g7 r7
110
+ psrlq m3, m0, 24
111
+ psrlq m4, m2, 24
112
+ punpckhbw m1, m0, m3 ; b4 b5 g4 g5 r4 r5
113
+ punpcklbw m0, m3 ; b0 b1 g0 g1 r0 r1
114
+ punpckhbw m3, m2, m4 ; b6 b7 g6 g7 r6 r7
115
+ punpcklbw m2, m4 ; b2 b3 g2 g3 r2 r3
116
+ punpcklwd m0, m2 ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
117
+ punpcklwd m1, m3 ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
118
+%else
119
+ pshufd m3, m0, q2301
120
+ pshufd m4, m1, q2301
121
+ punpckhbw m2, m0, m3 ; b2 b3 g2 g3 r2 r3
122
+ punpcklbw m0, m3 ; b0 b1 g0 g1 r0 r1
123
+ punpckhbw m3, m1, m4 ; b6 b7 g6 g7 r6 r7
124
+ punpcklbw m1, m4 ; b4 b5 g4 g5 r4 r5
125
+ punpcklwd m0, m2 ; b0 b1 b2 b3 g0 g1 g2 g3 r0 r1 r2 r3
126
+ punpcklwd m1, m3 ; b4 b5 b6 b7 g4 g5 g6 g7 r4 r5 r6 r7
127
+%endif
128
+ punpckldq m2, m0, m1 ; b0 b1 b2 b3 b4 b5 b6 b7 g0 g1 g2 g3 g4 g5 g6 g7
129
+ punpckhdq m0, m1 ; r0 r1 r2 r3 r4 r5 r6 r7
130
+ movh [r0+%9], m2
131
+ movhps [r2+%9], m2
132
+ movh [r4+%9], m0
133
+ add %8, %1*mmsize/2
134
+ add %9, mmsize/2
135
+ jl %%loopx
136
+ add r0, %2
137
+ add r2, %3
138
+ add r4, %4
139
+ add r6, %5
140
+ dec %7d
141
+ jg %%loopy
142
+%endmacro
143
+
144
+%macro PLANE_DEINTERLEAVE_RGB 0
145
+;-----------------------------------------------------------------------------
146
+; void x264_plane_copy_deinterleave_rgb( pixel *dsta, intptr_t i_dsta,
147
+; pixel *dstb, intptr_t i_dstb,
148
+; pixel *dstc, intptr_t i_dstc,
149
+; pixel *src, intptr_t i_src, int pw, int w, int h )
150
+;-----------------------------------------------------------------------------
151
+%if ARCH_X86_64
152
+cglobal plane_copy_deinterleave_rgb, 8,12
153
+ %define %%args r1, r3, r5, r7, r8, r9, r10, r11
154
+ mov r8d, r9m
155
+ mov r9d, r10m
156
+ add r0, r8
157
+ add r2, r8
158
+ add r4, r8
159
+ neg r8
160
+%else
161
+cglobal plane_copy_deinterleave_rgb, 1,7
162
+ %define %%args r1m, r3m, r5m, r7m, r9m, r1, r3, r5
163
+ mov r1, r9m
164
+ mov r2, r2m
165
+ mov r4, r4m
166
+ mov r6, r6m
167
+ add r0, r1
168
+ add r2, r1
169
+ add r4, r1
170
+ neg r1
171
+ mov r9m, r1
172
+ mov r1, r10m
173
+%endif
174
+ cmp dword r8m, 4
175
+ je .pw4
176
+ PLANE_DEINTERLEAVE_RGB_CORE 3, %%args ; BGR
177
+ jmp .ret
178
+.pw4:
179
+ PLANE_DEINTERLEAVE_RGB_CORE 4, %%args ; BGRA
180
+.ret:
181
+ REP_RET
182
+%endmacro
183
+
184
+%if HIGH_BIT_DEPTH == 0
185
+INIT_XMM sse2
186
+PLANE_DEINTERLEAVE_RGB
187
+INIT_XMM ssse3
188
+PLANE_DEINTERLEAVE_RGB
189
+%endif ; !HIGH_BIT_DEPTH
190
+
191
+%macro PLANE_DEINTERLEAVE_V210 0
192
+;-----------------------------------------------------------------------------
193
+; void x264_plane_copy_deinterleave_v210( uint16_t *dsty, intptr_t i_dsty,
194
+; uint16_t *dstc, intptr_t i_dstc,
195
+; uint32_t *src, intptr_t i_src, int w, int h )
196
+;-----------------------------------------------------------------------------
197
+%if ARCH_X86_64
198
+cglobal plane_copy_deinterleave_v210, 8,10,7
199
+%define src r8
200
+%define org_w r9
201
+%define h r7d
202
+%else
203
+cglobal plane_copy_deinterleave_v210, 7,7,7
204
+%define src r4m
205
+%define org_w r6m
206
+%define h dword r7m
207
+%endif
208
+ FIX_STRIDES r1, r3, r6d
209
+ shl r5, 2
210
+ add r0, r6
211
+ add r2, r6
212
+ neg r6
213
+ mov src, r4
214
+ mov org_w, r6
215
+ mova m2, [v210_mask]
216
+ mova m3, [v210_luma_shuf]
217
+ mova m4, [v210_chroma_shuf]
218
+ mova m5, [v210_mult] ; also functions as vpermd index for avx2
219
+ pshufd m6, m5, q1102
220
+
221
+ALIGN 16
222
+.loop:
223
+ movu m1, [r4]
224
+ pandn m0, m2, m1
225
+ pand m1, m2
226
+ pshufb m0, m3
227
+ pshufb m1, m4
228
+ pmulhrsw m0, m5 ; y0 y1 y2 y3 y4 y5 __ __
229
+ pmulhrsw m1, m6 ; u0 v0 u1 v1 u2 v2 __ __
230
+%if mmsize == 32
231
+ vpermd m0, m5, m0
232
+ vpermd m1, m5, m1
233
+%endif
234
+ movu [r0+r6], m0
235
+ movu [r2+r6], m1
236
+ add r4, mmsize
237
+ add r6, 3*mmsize/4
238
+ jl .loop
239
+ add r0, r1
240
+ add r2, r3
241
+ add src, r5
242
+ mov r4, src
243
+ mov r6, org_w
244
+ dec h
245
+ jg .loop
246
+ RET
247
+%endmacro ; PLANE_DEINTERLEAVE_V210
248
+
249
%if HIGH_BIT_DEPTH
250
INIT_MMX mmx2
251
PLANE_INTERLEAVE
252
253
INIT_XMM sse2
254
PLANE_INTERLEAVE
255
PLANE_DEINTERLEAVE
256
+INIT_XMM ssse3
257
+PLANE_DEINTERLEAVE_V210
258
INIT_XMM avx
259
PLANE_INTERLEAVE
260
PLANE_DEINTERLEAVE
261
+PLANE_DEINTERLEAVE_V210
262
+INIT_YMM avx2
263
+PLANE_DEINTERLEAVE_V210
264
%else
265
INIT_MMX mmx2
266
PLANE_INTERLEAVE
267
268
; uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
269
;-----------------------------------------------------------------------------
270
%macro MBTREE 0
271
-cglobal mbtree_propagate_cost, 7,7,7
272
- add r6d, r6d
273
- lea r0, [r0+r6*2]
274
- add r1, r6
275
- add r2, r6
276
- add r3, r6
277
- add r4, r6
278
- neg r6
279
- pxor xmm4, xmm4
280
- movss xmm6, [r5]
281
- shufps xmm6, xmm6, 0
282
- mulps xmm6, [pf_inv256]
283
- movdqa xmm5, [pw_3fff]
284
+cglobal mbtree_propagate_cost, 6,6,7
285
+ movss m6, [r5]
286
+ mov r5d, r6m
287
+ lea r0, [r0+r5*2]
288
+ add r5d, r5d
289
+ add r1, r5
290
+ add r2, r5
291
+ add r3, r5
292
+ add r4, r5
293
+ neg r5
294
+ pxor m4, m4
295
+ shufps m6, m6, 0
296
+ mova m5, [pw_3fff]
297
.loop:
298
- movq xmm2, [r2+r6] ; intra
299
- movq xmm0, [r4+r6] ; invq
300
- movq xmm3, [r3+r6] ; inter
301
- movq xmm1, [r1+r6] ; prop
302
- punpcklwd xmm2, xmm4
303
- punpcklwd xmm0, xmm4
304
- pmaddwd xmm0, xmm2
305
- pand xmm3, xmm5
306
- punpcklwd xmm1, xmm4
307
- punpcklwd xmm3, xmm4
308
+ movq m2, [r2+r5] ; intra
309
+ movq m0, [r4+r5] ; invq
310
+ movq m3, [r3+r5] ; inter
311
+ movq m1, [r1+r5] ; prop
312
+ pand m3, m5
313
+ pminsw m3, m2
314
+ punpcklwd m2, m4
315
+ punpcklwd m0, m4
316
+ pmaddwd m0, m2
317
+ punpcklwd m1, m4
318
+ punpcklwd m3, m4
319
%if cpuflag(fma4)
320
- cvtdq2ps xmm0, xmm0
321
- cvtdq2ps xmm1, xmm1
322
- fmaddps xmm0, xmm0, xmm6, xmm1
323
- cvtdq2ps xmm1, xmm2
324
- psubd xmm2, xmm3
325
- cvtdq2ps xmm2, xmm2
326
- rcpps xmm3, xmm1
327
- mulps xmm1, xmm3
328
- mulps xmm0, xmm2
329
- addps xmm2, xmm3, xmm3
330
- fnmaddps xmm3, xmm1, xmm3, xmm2
331
- mulps xmm0, xmm3
332
+ cvtdq2ps m0, m0
333
+ cvtdq2ps m1, m1
334
+ fmaddps m0, m0, m6, m1
335
+ cvtdq2ps m1, m2
336
+ psubd m2, m3
337
+ cvtdq2ps m2, m2
338
+ rcpps m3, m1
339
+ mulps m1, m3
340
+ mulps m0, m2
341
+ addps m2, m3, m3
342
+ fnmaddps m3, m1, m3, m2
343
+ mulps m0, m3
344
%else
345
- cvtdq2ps xmm0, xmm0
346
- mulps xmm0, xmm6 ; intra*invq*fps_factor>>8
347
- cvtdq2ps xmm1, xmm1 ; prop
348
- addps xmm0, xmm1 ; prop + (intra*invq*fps_factor>>8)
349
- cvtdq2ps xmm1, xmm2 ; intra
350
- psubd xmm2, xmm3 ; intra - inter
351
- cvtdq2ps xmm2, xmm2 ; intra - inter
352
- rcpps xmm3, xmm1 ; 1 / intra 1st approximation
353
- mulps xmm1, xmm3 ; intra * (1/intra 1st approx)
354
- mulps xmm1, xmm3 ; intra * (1/intra 1st approx)^2
355
- mulps xmm0, xmm2 ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
356
- addps xmm3, xmm3 ; 2 * (1/intra 1st approx)
357
- subps xmm3, xmm1 ; 2nd approximation for 1/intra
358
- mulps xmm0, xmm3 ; / intra
359
-%endif
360
- cvtps2dq xmm0, xmm0
361
- movdqa [r0+r6*2], xmm0
362
- add r6, 8
363
+ cvtdq2ps m0, m0
364
+ mulps m0, m6 ; intra*invq*fps_factor>>8
365
+ cvtdq2ps m1, m1 ; prop
366
+ addps m0, m1 ; prop + (intra*invq*fps_factor>>8)
367
+ cvtdq2ps m1, m2 ; intra
368
+ psubd m2, m3 ; intra - inter
369
+ cvtdq2ps m2, m2 ; intra - inter
370
+ rcpps m3, m1 ; 1 / intra 1st approximation
371
+ mulps m1, m3 ; intra * (1/intra 1st approx)
372
+ mulps m1, m3 ; intra * (1/intra 1st approx)^2
373
+ mulps m0, m2 ; (prop + (intra*invq*fps_factor>>8)) * (intra - inter)
374
+ addps m3, m3 ; 2 * (1/intra 1st approx)
375
+ subps m3, m1 ; 2nd approximation for 1/intra
376
+ mulps m0, m3 ; / intra
377
+%endif
378
+ cvtps2dq m0, m0
379
+ packssdw m0, m0
380
+ movh [r0+r5], m0
381
+ add r5, 8
382
jl .loop
383
RET
384
%endmacro
385
386
MBTREE
387
388
%macro INT16_UNPACK 1
389
- vpunpckhwd xm4, xm%1, xm7
390
- vpunpcklwd xm%1, xm7
391
- vinsertf128 m%1, m%1, xm4, 1
392
+ punpckhwd xm4, xm%1, xm7
393
+ punpcklwd xm%1, xm7
394
+ vinsertf128 m%1, m%1, xm4, 1
395
%endmacro
396
397
-; FIXME: align loads/stores to 16 bytes
398
-%macro MBTREE_AVX 0
399
-cglobal mbtree_propagate_cost, 7,7,8
400
- add r6d, r6d
401
- lea r0, [r0+r6*2]
402
- add r1, r6
403
- add r2, r6
404
- add r3, r6
405
- add r4, r6
406
- neg r6
407
- mova xm5, [pw_3fff]
408
- vbroadcastss m6, [r5]
409
- mulps m6, [pf_inv256]
410
+; FIXME: align loads to 16 bytes
411
+%macro MBTREE_AVX 1
412
+cglobal mbtree_propagate_cost, 6,6,%1
413
+ vbroadcastss m6, [r5]
414
+ mov r5d, r6m
415
+ lea r0, [r0+r5*2]
416
+ add r5d, r5d
417
+ add r1, r5
418
+ add r2, r5
419
+ add r3, r5
420
+ add r4, r5
421
+ neg r5
422
+ mova xm5, [pw_3fff]
423
%if notcpuflag(avx2)
424
- pxor xm7, xm7
425
+ pxor xm7, xm7
426
%endif
427
.loop:
428
%if cpuflag(avx2)
429
- pmovzxwd m0, [r2+r6] ; intra
430
- pmovzxwd m1, [r4+r6] ; invq
431
- pmovzxwd m2, [r1+r6] ; prop
432
- pand xm3, xm5, [r3+r6] ; inter
433
+ pmovzxwd m0, [r2+r5] ; intra
434
+ pmovzxwd m1, [r4+r5] ; invq
435
+ pmovzxwd m2, [r1+r5] ; prop
436
+ pand xm3, xm5, [r3+r5] ; inter
437
pmovzxwd m3, xm3
438
+ pminsd m3, m0
439
pmaddwd m1, m0
440
psubd m4, m0, m3
441
cvtdq2ps m0, m0
442
443
fnmaddps m4, m2, m3, m4
444
mulps m1, m4
445
%else
446
- movu xm0, [r2+r6]
447
- movu xm1, [r4+r6]
448
- movu xm2, [r1+r6]
449
- pand xm3, xm5, [r3+r6]
450
+ movu xm0, [r2+r5]
451
+ movu xm1, [r4+r5]
452
+ movu xm2, [r1+r5]
453
+ pand xm3, xm5, [r3+r5]
454
+ pminsw xm3, xm0
455
INT16_UNPACK 0
456
INT16_UNPACK 1
457
INT16_UNPACK 2
458
459
mulps m1, m3 ; / intra
460
%endif
461
vcvtps2dq m1, m1
462
- movu [r0+r6*2], m1
463
- add r6, 16
464
+ vextractf128 xm2, m1, 1
465
+ packssdw xm1, xm2
466
+ mova [r0+r5], xm1
467
+ add r5, 16
468
jl .loop
469
RET
470
%endmacro
471
472
INIT_YMM avx
473
-MBTREE_AVX
474
+MBTREE_AVX 8
475
INIT_YMM avx2,fma3
476
-MBTREE_AVX
477
+MBTREE_AVX 7
478
+
479
+%macro MBTREE_PROPAGATE_LIST 0
480
+;-----------------------------------------------------------------------------
481
+; void mbtree_propagate_list_internal( int16_t (*mvs)[2], int *propagate_amount, uint16_t *lowres_costs,
482
+; int16_t *output, int bipred_weight, int mb_y, int len )
483
+;-----------------------------------------------------------------------------
484
+cglobal mbtree_propagate_list_internal, 4,6,8
485
+ movh m6, [pw_0to15] ; mb_x
486
+ movd m7, r5m
487
+ pshuflw m7, m7, 0
488
+ punpcklwd m6, m7 ; 0 y 1 y 2 y 3 y
489
+ movd m7, r4m
490
+ SPLATW m7, m7 ; bipred_weight
491
+ psllw m7, 9 ; bipred_weight << 9
492
+
493
+ mov r5d, r6m
494
+ xor r4d, r4d
495
+.loop:
496
+ mova m3, [r1+r4*2]
497
+ movu m4, [r2+r4*2]
498
+ mova m5, [pw_0xc000]
499
+ pand m4, m5
500
+ pcmpeqw m4, m5
501
+ pmulhrsw m5, m3, m7 ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
502
+%if cpuflag(avx)
503
+ pblendvb m5, m3, m5, m4
504
+%else
505
+ pand m5, m4
506
+ pandn m4, m3
507
+ por m5, m4 ; if( lists_used == 3 )
508
+ ; propagate_amount = (propagate_amount * bipred_weight + 32) >> 6
509
+%endif
510
+
511
+ movu m0, [r0+r4*4] ; x,y
512
+ movu m1, [r0+r4*4+mmsize]
513
+
514
+ psraw m2, m0, 5
515
+ psraw m3, m1, 5
516
+ mova m4, [pd_4]
517
+ paddw m2, m6 ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
518
+ paddw m6, m4 ; {mbx, mby} += {4, 0}
519
+ paddw m3, m6 ; {mbx, mby} = ({x,y}>>5)+{h->mb.i_mb_x,h->mb.i_mb_y}
520
+ paddw m6, m4 ; {mbx, mby} += {4, 0}
521
+
522
+ mova [r3+mmsize*0], m2
523
+ mova [r3+mmsize*1], m3
524
+
525
+ mova m3, [pw_31]
526
+ pand m0, m3 ; x &= 31
527
+ pand m1, m3 ; y &= 31
528
+ packuswb m0, m1
529
+ psrlw m1, m0, 3
530
+ pand m0, m3 ; x
531
+ SWAP 1, 3
532
+ pandn m1, m3 ; y premultiplied by (1<<5) for later use of pmulhrsw
533
+
534
+ mova m3, [pw_32]
535
+ psubw m3, m0 ; 32 - x
536
+ mova m4, [pw_1024]
537
+ psubw m4, m1 ; (32 - y) << 5
538
+
539
+ pmullw m2, m3, m4 ; idx0weight = (32-y)*(32-x) << 5
540
+ pmullw m4, m0 ; idx1weight = (32-y)*x << 5
541
+ pmullw m0, m1 ; idx3weight = y*x << 5
542
+ pmullw m1, m3 ; idx2weight = y*(32-x) << 5
543
+
544
+ ; avoid overflow in the input to pmulhrsw
545
+ psrlw m3, m2, 15
546
+ psubw m2, m3 ; idx0weight -= (idx0weight == 32768)
547
+
548
+ pmulhrsw m2, m5 ; idx0weight * propagate_amount + 512 >> 10
549
+ pmulhrsw m4, m5 ; idx1weight * propagate_amount + 512 >> 10
550
+ pmulhrsw m1, m5 ; idx2weight * propagate_amount + 512 >> 10
551
+ pmulhrsw m0, m5 ; idx3weight * propagate_amount + 512 >> 10
552
+
553
+ SBUTTERFLY wd, 2, 4, 3
554
+ SBUTTERFLY wd, 1, 0, 3
555
+ mova [r3+mmsize*2], m2
556
+ mova [r3+mmsize*3], m4
557
+ mova [r3+mmsize*4], m1
558
+ mova [r3+mmsize*5], m0
559
+ add r4d, mmsize/2
560
+ add r3, mmsize*6
561
+ cmp r4d, r5d
562
+ jl .loop
563
+ REP_RET
564
+%endmacro
565
+
566
+INIT_XMM ssse3
567
+MBTREE_PROPAGATE_LIST
568
+INIT_XMM avx
569
+MBTREE_PROPAGATE_LIST
570
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc-c.c
Changed
281
1
2
/*****************************************************************************
3
* mc-c.c: x86 motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
12
uint16_t *dstv, intptr_t i_dstv,
13
uint16_t *src, intptr_t i_src, int w, int h );
14
+void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta,
15
+ pixel *dstb, intptr_t i_dstb,
16
+ pixel *dstc, intptr_t i_dstc,
17
+ pixel *src, intptr_t i_src, int pw, int w, int h );
18
+void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta,
19
+ pixel *dstb, intptr_t i_dstb,
20
+ pixel *dstc, intptr_t i_dstc,
21
+ pixel *src, intptr_t i_src, int pw, int w, int h );
22
+void x264_plane_copy_deinterleave_v210_ssse3( uint16_t *dstu, intptr_t i_dstu,
23
+ uint16_t *dstv, intptr_t i_dstv,
24
+ uint32_t *src, intptr_t i_src, int w, int h );
25
+void x264_plane_copy_deinterleave_v210_avx ( uint16_t *dstu, intptr_t i_dstu,
26
+ uint16_t *dstv, intptr_t i_dstv,
27
+ uint32_t *src, intptr_t i_src, int w, int h );
28
+void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu,
29
+ uint16_t *dstv, intptr_t i_dstv,
30
+ uint32_t *src, intptr_t i_src, int w, int h );
31
void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
32
void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
33
void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
34
35
void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
36
void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
37
void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride );
38
-void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
39
+void x264_mbtree_propagate_cost_sse2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
40
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
41
-void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
42
+void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
43
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
44
-void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
45
+void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
46
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
47
-void x264_mbtree_propagate_cost_avx2_fma3( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
48
+void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
49
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
50
51
#define MC_CHROMA(cpu)\
52
53
int dx, int dy, int i_width, int i_height );
54
MC_CHROMA(mmx2)
55
MC_CHROMA(sse2)
56
-MC_CHROMA(sse2_misalign)
57
MC_CHROMA(ssse3)
58
MC_CHROMA(ssse3_cache64)
59
MC_CHROMA(avx)
60
61
PIXEL_AVG_WALL(cache64_mmx2)
62
PIXEL_AVG_WALL(cache64_sse2)
63
PIXEL_AVG_WALL(sse2)
64
-PIXEL_AVG_WALL(sse2_misalign)
65
PIXEL_AVG_WALL(cache64_ssse3)
66
PIXEL_AVG_WALL(avx2)
67
68
69
PIXEL_AVG_WTAB(cache64_mmx2, mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2, cache64_mmx2)
70
#endif
71
PIXEL_AVG_WTAB(sse2, mmx2, mmx2, sse2, sse2, sse2)
72
-PIXEL_AVG_WTAB(sse2_misalign, mmx2, mmx2, sse2, sse2, sse2_misalign)
73
PIXEL_AVG_WTAB(cache64_sse2, mmx2, cache64_mmx2, cache64_sse2, cache64_sse2, cache64_sse2)
74
PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3, cache64_sse2)
75
PIXEL_AVG_WTAB(cache64_ssse3_atom, mmx2, mmx2, cache64_ssse3, cache64_ssse3, sse2)
76
77
GET_REF(cache32_mmx2)
78
GET_REF(cache64_mmx2)
79
#endif
80
-GET_REF(sse2_misalign)
81
GET_REF(cache64_sse2)
82
GET_REF(cache64_ssse3)
83
GET_REF(cache64_ssse3_atom)
84
85
HPEL(16, avx, avx, avx, avx)
86
HPEL(32, avx2, avx2, avx2, avx2)
87
#endif
88
-HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
89
#endif // HIGH_BIT_DEPTH
90
91
static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
92
93
PLANE_INTERLEAVE(avx)
94
#endif
95
96
+#if HAVE_X86_INLINE_ASM
97
+#define CLIP_ADD(s,x)\
98
+do\
99
+{\
100
+ int temp;\
101
+ asm("movd %0, %%xmm0 \n"\
102
+ "movd %2, %%xmm1 \n"\
103
+ "paddsw %%xmm1, %%xmm0 \n"\
104
+ "movd %%xmm0, %1 \n"\
105
+ :"+m"(s), "=&r"(temp)\
106
+ :"m"(x)\
107
+ );\
108
+ s = temp;\
109
+} while(0)
110
+
111
+#define CLIP_ADD2(s,x)\
112
+do\
113
+{\
114
+ asm("movd %0, %%xmm0 \n"\
115
+ "movd %1, %%xmm1 \n"\
116
+ "paddsw %%xmm1, %%xmm0 \n"\
117
+ "movd %%xmm0, %0 \n"\
118
+ :"+m"(M32(s))\
119
+ :"m"(M32(x))\
120
+ );\
121
+} while(0)
122
+#else
123
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1)
124
+#define CLIP_ADD2(s,x)\
125
+do\
126
+{\
127
+ CLIP_ADD((s)[0], (x)[0]);\
128
+ CLIP_ADD((s)[1], (x)[1]);\
129
+} while(0)
130
+#endif
131
+
132
+#define PROPAGATE_LIST(cpu)\
133
+void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\
134
+ uint16_t *lowres_costs, int16_t *output,\
135
+ int bipred_weight, int mb_y, int len );\
136
+\
137
+static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\
138
+ int16_t *propagate_amount, uint16_t *lowres_costs,\
139
+ int bipred_weight, int mb_y, int len, int list )\
140
+{\
141
+ int16_t *current = h->scratch_buffer2;\
142
+\
143
+ x264_mbtree_propagate_list_internal_##cpu( mvs, propagate_amount, lowres_costs,\
144
+ current, bipred_weight, mb_y, len );\
145
+\
146
+ unsigned stride = h->mb.i_mb_stride;\
147
+ unsigned width = h->mb.i_mb_width;\
148
+ unsigned height = h->mb.i_mb_height;\
149
+\
150
+ for( unsigned i = 0; i < len; current += 32 )\
151
+ {\
152
+ int end = X264_MIN( i+8, len );\
153
+ for( ; i < end; i++, current += 2 )\
154
+ {\
155
+ if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) )\
156
+ continue;\
157
+\
158
+ unsigned mbx = current[0];\
159
+ unsigned mby = current[1];\
160
+ unsigned idx0 = mbx + mby * stride;\
161
+ unsigned idx2 = idx0 + stride;\
162
+\
163
+ /* Shortcut for the simple/common case of zero MV */\
164
+ if( !M32( mvs[i] ) )\
165
+ {\
166
+ CLIP_ADD( ref_costs[idx0], current[16] );\
167
+ continue;\
168
+ }\
169
+\
170
+ if( mbx < width-1 && mby < height-1 )\
171
+ {\
172
+ CLIP_ADD2( ref_costs+idx0, current+16 );\
173
+ CLIP_ADD2( ref_costs+idx2, current+32 );\
174
+ }\
175
+ else\
176
+ {\
177
+ /* Note: this takes advantage of unsigned representation to\
178
+ * catch negative mbx/mby. */\
179
+ if( mby < height )\
180
+ {\
181
+ if( mbx < width )\
182
+ CLIP_ADD( ref_costs[idx0+0], current[16] );\
183
+ if( mbx+1 < width )\
184
+ CLIP_ADD( ref_costs[idx0+1], current[17] );\
185
+ }\
186
+ if( mby+1 < height )\
187
+ {\
188
+ if( mbx < width )\
189
+ CLIP_ADD( ref_costs[idx2+0], current[32] );\
190
+ if( mbx+1 < width )\
191
+ CLIP_ADD( ref_costs[idx2+1], current[33] );\
192
+ }\
193
+ }\
194
+ }\
195
+ }\
196
+}
197
+
198
+PROPAGATE_LIST(ssse3)
199
+PROPAGATE_LIST(avx)
200
+#undef CLIP_ADD
201
+#undef CLIP_ADD2
202
+
203
void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
204
{
205
if( !(cpu&X264_CPU_MMX) )
206
207
return;
208
209
pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3;
210
+ pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3;
211
+ pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
212
213
if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) )
214
pf->integral_init4v = x264_integral_init4v_ssse3;
215
216
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx;
217
pf->plane_copy_interleave = x264_plane_copy_interleave_avx;
218
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx;
219
+ pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx;
220
pf->store_interleave_chroma = x264_store_interleave_chroma_avx;
221
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_avx;
222
223
224
pf->frame_init_lowres_core = x264_frame_init_lowres_core_xop;
225
226
if( cpu&X264_CPU_AVX2 )
227
+ {
228
pf->mc_luma = mc_luma_avx2;
229
+ pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2;
230
+ }
231
#else // !HIGH_BIT_DEPTH
232
233
#if ARCH_X86 // all x86_64 cpus with cacheline split issues use sse2 instead
234
235
pf->integral_init8v = x264_integral_init8v_sse2;
236
pf->hpel_filter = x264_hpel_filter_sse2_amd;
237
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2;
238
+ pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2;
239
240
if( !(cpu&X264_CPU_SSE2_IS_SLOW) )
241
{
242
243
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_sse2;
244
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_sse2;
245
pf->hpel_filter = x264_hpel_filter_sse2;
246
- if( cpu&X264_CPU_SSE_MISALIGN )
247
- pf->hpel_filter = x264_hpel_filter_sse2_misalign;
248
pf->frame_init_lowres_core = x264_frame_init_lowres_core_sse2;
249
if( !(cpu&X264_CPU_STACK_MOD4) )
250
pf->mc_chroma = x264_mc_chroma_sse2;
251
252
pf->mc_luma = mc_luma_cache64_sse2;
253
pf->get_ref = get_ref_cache64_sse2;
254
}
255
- if( cpu&X264_CPU_SSE_MISALIGN )
256
- {
257
- pf->get_ref = get_ref_sse2_misalign;
258
- if( !(cpu&X264_CPU_STACK_MOD4) )
259
- pf->mc_chroma = x264_mc_chroma_sse2_misalign;
260
- }
261
}
262
}
263
264
265
pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_ssse3;
266
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_ssse3;
267
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_ssse3;
268
+ pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3;
269
+ pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3;
270
271
if( !(cpu&X264_CPU_SLOW_PSHUFB) )
272
{
273
274
return;
275
pf->memzero_aligned = x264_memzero_aligned_avx;
276
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx;
277
+ pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx;
278
279
if( cpu&X264_CPU_FMA4 )
280
pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4;
281
x264-snapshot-20130723-2245.tar.bz2/common/x86/mc.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/mc.h
Changed
10
1
2
/*****************************************************************************
3
* mc.h: x86 motion compensation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-32.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-32.asm
Changed
10
1
2
;*****************************************************************************
3
;* pixel-32.asm: x86_32 pixel metrics
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel-a.asm
Changed
28
1
2
;*****************************************************************************
3
;* pixel.asm: x86 pixel metrics
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Holger Lubitz <holger@lubitz.org>
10
11
mov r4d, %%n
12
%endif
13
pxor m0, m0
14
-.loop
15
+.loop:
16
mova m1, [r0]
17
mova m2, [r0+offset0_1]
18
mova m3, [r0+offset0_2]
19
20
; clobber: m3..m7
21
; out: %1 = satd
22
%macro SATD_4x4_MMX 3
23
- %xdefine %%n n%1
24
+ %xdefine %%n nn%1
25
%assign offset %2*SIZEOF_PIXEL
26
LOAD_DIFF m4, m3, none, [r0+ offset], [r2+ offset]
27
LOAD_DIFF m5, m3, none, [r0+ r1+offset], [r2+ r3+offset]
28
x264-snapshot-20130723-2245.tar.bz2/common/x86/pixel.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/pixel.h
Changed
26
1
2
/*****************************************************************************
3
* pixel.h: x86 pixel metrics
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
12
DECL_X1( sad, mmx2 )
13
DECL_X1( sad, sse2 )
14
-DECL_X4( sad, sse2_misalign )
15
DECL_X1( sad, sse3 )
16
DECL_X1( sad, sse2_aligned )
17
DECL_X1( sad, ssse3 )
18
19
DECL_X4( sad, sse2 )
20
DECL_X4( sad, sse3 )
21
DECL_X4( sad, ssse3 )
22
+DECL_X4( sad, avx )
23
DECL_X4( sad, avx2 )
24
DECL_X1( ssd, mmx )
25
DECL_X1( ssd, mmx2 )
26
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-a.asm
Changed
26
1
2
;*****************************************************************************
3
;* predict-a.asm: x86 intra prediction
4
;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Holger Lubitz <holger@lubitz.org>
10
11
12
SECTION_RODATA 32
13
14
-pw_0to15: dw 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
15
pw_43210123: times 2 dw -3, -2, -1, 0, 1, 2, 3, 4
16
pw_m3: times 16 dw -3
17
pw_m7: times 16 dw -7
18
19
cextern pw_16
20
cextern pw_00ff
21
cextern pw_pixel_max
22
+cextern pw_0to15
23
24
%macro STORE8 1
25
mova [r0+0*FDEC_STRIDEB], %1
26
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict-c.c
Changed
10
1
2
/*****************************************************************************
3
* predict-c.c: intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/predict.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/predict.h
Changed
10
1
2
/*****************************************************************************
3
* predict.h: x86 intra prediction
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant-a.asm
Changed
10
1
2
;*****************************************************************************
3
;* quant-a.asm: x86 quantization and level-run
4
;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/quant.h
Changed
10
1
2
/*****************************************************************************
3
* quant.h: x86 quantization and level-run
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/sad-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad-a.asm
Changed
722
1
2
;*****************************************************************************
3
;* sad-a.asm: x86 sad functions
4
;*****************************************************************************
5
-;* Copyright (C) 2003-2013 x264 project
6
+;* Copyright (C) 2003-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Jason Garrett-Glaser <darkshikari@gmail.com>
10
11
SECTION_RODATA 32
12
13
pb_shuf8x8c2: times 2 db 0,0,0,0,8,8,8,8,-1,-1,-1,-1,-1,-1,-1,-1
14
-deinterleave_sadx4: dd 0,4,2,6
15
hpred_shuf: db 0,0,2,2,8,8,10,10,1,1,3,3,9,9,11,11
16
17
SECTION .text
18
19
;=============================================================================
20
21
%macro SAD_X3_START_1x16P_SSE2 0
22
-%if cpuflag(misalign)
23
- mova xmm2, [r0]
24
- movu xmm0, [r1]
25
- movu xmm1, [r2]
26
- psadbw xmm0, xmm2
27
- psadbw xmm1, xmm2
28
- psadbw xmm2, [r3]
29
+ mova m2, [r0]
30
+%if cpuflag(avx)
31
+ psadbw m0, m2, [r1]
32
+ psadbw m1, m2, [r2]
33
+ psadbw m2, [r3]
34
%else
35
- mova xmm3, [r0]
36
- movu xmm0, [r1]
37
- movu xmm1, [r2]
38
- movu xmm2, [r3]
39
- psadbw xmm0, xmm3
40
- psadbw xmm1, xmm3
41
- psadbw xmm2, xmm3
42
+ movu m0, [r1]
43
+ movu m1, [r2]
44
+ movu m3, [r3]
45
+ psadbw m0, m2
46
+ psadbw m1, m2
47
+ psadbw m2, m3
48
%endif
49
%endmacro
50
51
%macro SAD_X3_1x16P_SSE2 2
52
-%if cpuflag(misalign)
53
- mova xmm3, [r0+%1]
54
- movu xmm4, [r1+%2]
55
- movu xmm5, [r2+%2]
56
- psadbw xmm4, xmm3
57
- psadbw xmm5, xmm3
58
- psadbw xmm3, [r3+%2]
59
- paddw xmm0, xmm4
60
- paddw xmm1, xmm5
61
- paddw xmm2, xmm3
62
+ mova m3, [r0+%1]
63
+%if cpuflag(avx)
64
+ psadbw m4, m3, [r1+%2]
65
+ psadbw m5, m3, [r2+%2]
66
+ psadbw m3, [r3+%2]
67
%else
68
- mova xmm3, [r0+%1]
69
- movu xmm4, [r1+%2]
70
- movu xmm5, [r2+%2]
71
- movu xmm6, [r3+%2]
72
- psadbw xmm4, xmm3
73
- psadbw xmm5, xmm3
74
- psadbw xmm6, xmm3
75
- paddw xmm0, xmm4
76
- paddw xmm1, xmm5
77
- paddw xmm2, xmm6
78
+ movu m4, [r1+%2]
79
+ movu m5, [r2+%2]
80
+ movu m6, [r3+%2]
81
+ psadbw m4, m3
82
+ psadbw m5, m3
83
+ psadbw m3, m6
84
%endif
85
+ paddw m0, m4
86
+ paddw m1, m5
87
+ paddw m2, m3
88
%endmacro
89
90
+%if ARCH_X86_64
91
+ DECLARE_REG_TMP 6
92
+%else
93
+ DECLARE_REG_TMP 5
94
+%endif
95
+
96
%macro SAD_X3_4x16P_SSE2 2
97
%if %1==0
98
-%if UNIX64
99
- mov r6, r5
100
-%endif
101
- lea r5, [r4*3]
102
+ lea t0, [r4*3]
103
SAD_X3_START_1x16P_SSE2
104
%else
105
SAD_X3_1x16P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0
106
%endif
107
SAD_X3_1x16P_SSE2 FENC_STRIDE*(1+(%1&1)*4), r4*1
108
SAD_X3_1x16P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2
109
- SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), r5
110
+ SAD_X3_1x16P_SSE2 FENC_STRIDE*(3+(%1&1)*4), t0
111
%if %1 != %2-1
112
%if (%1&1) != 0
113
add r0, 8*FENC_STRIDE
114
115
%endmacro
116
117
%macro SAD_X3_START_2x8P_SSE2 0
118
- movq xmm7, [r0]
119
- movq xmm0, [r1]
120
- movq xmm1, [r2]
121
- movq xmm2, [r3]
122
- movhps xmm7, [r0+FENC_STRIDE]
123
- movhps xmm0, [r1+r4]
124
- movhps xmm1, [r2+r4]
125
- movhps xmm2, [r3+r4]
126
- psadbw xmm0, xmm7
127
- psadbw xmm1, xmm7
128
- psadbw xmm2, xmm7
129
+ movq m3, [r0]
130
+ movq m0, [r1]
131
+ movq m1, [r2]
132
+ movq m2, [r3]
133
+ movhps m3, [r0+FENC_STRIDE]
134
+ movhps m0, [r1+r4]
135
+ movhps m1, [r2+r4]
136
+ movhps m2, [r3+r4]
137
+ psadbw m0, m3
138
+ psadbw m1, m3
139
+ psadbw m2, m3
140
%endmacro
141
142
%macro SAD_X3_2x8P_SSE2 4
143
- movq xmm7, [r0+%1]
144
- movq xmm3, [r1+%2]
145
- movq xmm4, [r2+%2]
146
- movq xmm5, [r3+%2]
147
- movhps xmm7, [r0+%3]
148
- movhps xmm3, [r1+%4]
149
- movhps xmm4, [r2+%4]
150
- movhps xmm5, [r3+%4]
151
- psadbw xmm3, xmm7
152
- psadbw xmm4, xmm7
153
- psadbw xmm5, xmm7
154
- paddw xmm0, xmm3
155
- paddw xmm1, xmm4
156
- paddw xmm2, xmm5
157
+ movq m6, [r0+%1]
158
+ movq m3, [r1+%2]
159
+ movq m4, [r2+%2]
160
+ movq m5, [r3+%2]
161
+ movhps m6, [r0+%3]
162
+ movhps m3, [r1+%4]
163
+ movhps m4, [r2+%4]
164
+ movhps m5, [r3+%4]
165
+ psadbw m3, m6
166
+ psadbw m4, m6
167
+ psadbw m5, m6
168
+ paddw m0, m3
169
+ paddw m1, m4
170
+ paddw m2, m5
171
%endmacro
172
173
%macro SAD_X4_START_2x8P_SSE2 0
174
- movq xmm7, [r0]
175
- movq xmm0, [r1]
176
- movq xmm1, [r2]
177
- movq xmm2, [r3]
178
- movq xmm3, [r4]
179
- movhps xmm7, [r0+FENC_STRIDE]
180
- movhps xmm0, [r1+r5]
181
- movhps xmm1, [r2+r5]
182
- movhps xmm2, [r3+r5]
183
- movhps xmm3, [r4+r5]
184
- psadbw xmm0, xmm7
185
- psadbw xmm1, xmm7
186
- psadbw xmm2, xmm7
187
- psadbw xmm3, xmm7
188
+ movq m4, [r0]
189
+ movq m0, [r1]
190
+ movq m1, [r2]
191
+ movq m2, [r3]
192
+ movq m3, [r4]
193
+ movhps m4, [r0+FENC_STRIDE]
194
+ movhps m0, [r1+r5]
195
+ movhps m1, [r2+r5]
196
+ movhps m2, [r3+r5]
197
+ movhps m3, [r4+r5]
198
+ psadbw m0, m4
199
+ psadbw m1, m4
200
+ psadbw m2, m4
201
+ psadbw m3, m4
202
%endmacro
203
204
%macro SAD_X4_2x8P_SSE2 4
205
- movq xmm7, [r0+%1]
206
- movq xmm4, [r1+%2]
207
- movq xmm5, [r2+%2]
208
-%if ARCH_X86_64
209
- movq xmm6, [r3+%2]
210
- movq xmm8, [r4+%2]
211
- movhps xmm7, [r0+%3]
212
- movhps xmm4, [r1+%4]
213
- movhps xmm5, [r2+%4]
214
- movhps xmm6, [r3+%4]
215
- movhps xmm8, [r4+%4]
216
- psadbw xmm4, xmm7
217
- psadbw xmm5, xmm7
218
- psadbw xmm6, xmm7
219
- psadbw xmm8, xmm7
220
- paddw xmm0, xmm4
221
- paddw xmm1, xmm5
222
- paddw xmm2, xmm6
223
- paddw xmm3, xmm8
224
-%else
225
- movhps xmm7, [r0+%3]
226
- movhps xmm4, [r1+%4]
227
- movhps xmm5, [r2+%4]
228
- psadbw xmm4, xmm7
229
- psadbw xmm5, xmm7
230
- paddw xmm0, xmm4
231
- paddw xmm1, xmm5
232
- movq xmm6, [r3+%2]
233
- movq xmm4, [r4+%2]
234
- movhps xmm6, [r3+%4]
235
- movhps xmm4, [r4+%4]
236
- psadbw xmm6, xmm7
237
- psadbw xmm4, xmm7
238
- paddw xmm2, xmm6
239
- paddw xmm3, xmm4
240
-%endif
241
+ movq m6, [r0+%1]
242
+ movq m4, [r1+%2]
243
+ movq m5, [r2+%2]
244
+ movhps m6, [r0+%3]
245
+ movhps m4, [r1+%4]
246
+ movhps m5, [r2+%4]
247
+ psadbw m4, m6
248
+ psadbw m5, m6
249
+ paddw m0, m4
250
+ paddw m1, m5
251
+ movq m4, [r3+%2]
252
+ movq m5, [r4+%2]
253
+ movhps m4, [r3+%4]
254
+ movhps m5, [r4+%4]
255
+ psadbw m4, m6
256
+ psadbw m5, m6
257
+ paddw m2, m4
258
+ paddw m3, m5
259
%endmacro
260
261
%macro SAD_X4_START_1x16P_SSE2 0
262
-%if cpuflag(misalign)
263
- mova xmm3, [r0]
264
- movu xmm0, [r1]
265
- movu xmm1, [r2]
266
- movu xmm2, [r3]
267
- psadbw xmm0, xmm3
268
- psadbw xmm1, xmm3
269
- psadbw xmm2, xmm3
270
- psadbw xmm3, [r4]
271
+ mova m3, [r0]
272
+%if cpuflag(avx)
273
+ psadbw m0, m3, [r1]
274
+ psadbw m1, m3, [r2]
275
+ psadbw m2, m3, [r3]
276
+ psadbw m3, [r4]
277
%else
278
- mova xmm7, [r0]
279
- movu xmm0, [r1]
280
- movu xmm1, [r2]
281
- movu xmm2, [r3]
282
- movu xmm3, [r4]
283
- psadbw xmm0, xmm7
284
- psadbw xmm1, xmm7
285
- psadbw xmm2, xmm7
286
- psadbw xmm3, xmm7
287
+ movu m0, [r1]
288
+ movu m1, [r2]
289
+ movu m2, [r3]
290
+ movu m4, [r4]
291
+ psadbw m0, m3
292
+ psadbw m1, m3
293
+ psadbw m2, m3
294
+ psadbw m3, m4
295
%endif
296
%endmacro
297
298
%macro SAD_X4_1x16P_SSE2 2
299
-%if cpuflag(misalign)
300
- mova xmm7, [r0+%1]
301
- movu xmm4, [r1+%2]
302
- movu xmm5, [r2+%2]
303
- movu xmm6, [r3+%2]
304
- psadbw xmm4, xmm7
305
- psadbw xmm5, xmm7
306
- psadbw xmm6, xmm7
307
- psadbw xmm7, [r4+%2]
308
- paddw xmm0, xmm4
309
- paddw xmm1, xmm5
310
- paddw xmm2, xmm6
311
- paddw xmm3, xmm7
312
+ mova m6, [r0+%1]
313
+%if cpuflag(avx)
314
+ psadbw m4, m6, [r1+%2]
315
+ psadbw m5, m6, [r2+%2]
316
%else
317
- mova xmm7, [r0+%1]
318
- movu xmm4, [r1+%2]
319
- movu xmm5, [r2+%2]
320
- movu xmm6, [r3+%2]
321
-%if ARCH_X86_64
322
- movu xmm8, [r4+%2]
323
- psadbw xmm4, xmm7
324
- psadbw xmm5, xmm7
325
- psadbw xmm6, xmm7
326
- psadbw xmm8, xmm7
327
- paddw xmm0, xmm4
328
- paddw xmm1, xmm5
329
- paddw xmm2, xmm6
330
- paddw xmm3, xmm8
331
-%else
332
- psadbw xmm4, xmm7
333
- psadbw xmm5, xmm7
334
- paddw xmm0, xmm4
335
- psadbw xmm6, xmm7
336
- movu xmm4, [r4+%2]
337
- paddw xmm1, xmm5
338
- psadbw xmm4, xmm7
339
- paddw xmm2, xmm6
340
- paddw xmm3, xmm4
341
+ movu m4, [r1+%2]
342
+ movu m5, [r2+%2]
343
+ psadbw m4, m6
344
+ psadbw m5, m6
345
%endif
346
+ paddw m0, m4
347
+ paddw m1, m5
348
+%if cpuflag(avx)
349
+ psadbw m4, m6, [r3+%2]
350
+ psadbw m5, m6, [r4+%2]
351
+%else
352
+ movu m4, [r3+%2]
353
+ movu m5, [r4+%2]
354
+ psadbw m4, m6
355
+ psadbw m5, m6
356
%endif
357
+ paddw m2, m4
358
+ paddw m3, m5
359
%endmacro
360
361
%macro SAD_X4_4x16P_SSE2 2
362
363
364
%macro SAD_X3_4x8P_SSE2 2
365
%if %1==0
366
-%if UNIX64
367
- mov r6, r5
368
-%endif
369
- lea r5, [r4*3]
370
+ lea t0, [r4*3]
371
SAD_X3_START_2x8P_SSE2
372
%else
373
SAD_X3_2x8P_SSE2 FENC_STRIDE*(0+(%1&1)*4), r4*0, FENC_STRIDE*(1+(%1&1)*4), r4*1
374
%endif
375
- SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), r5
376
+ SAD_X3_2x8P_SSE2 FENC_STRIDE*(2+(%1&1)*4), r4*2, FENC_STRIDE*(3+(%1&1)*4), t0
377
%if %1 != %2-1
378
%if (%1&1) != 0
379
add r0, 8*FENC_STRIDE
380
381
%endmacro
382
383
%macro SAD_X3_END_SSE2 0
384
- movhlps xmm4, xmm0
385
- movhlps xmm5, xmm1
386
- movhlps xmm6, xmm2
387
- paddw xmm0, xmm4
388
- paddw xmm1, xmm5
389
- paddw xmm2, xmm6
390
-%if UNIX64
391
- movd [r6+0], xmm0
392
- movd [r6+4], xmm1
393
- movd [r6+8], xmm2
394
+ movifnidn r5, r5mp
395
+%if cpuflag(ssse3)
396
+ packssdw m0, m1
397
+ packssdw m2, m2
398
+ phaddd m0, m2
399
+ mova [r5], m0
400
%else
401
- mov r0, r5mp
402
- movd [r0+0], xmm0
403
- movd [r0+4], xmm1
404
- movd [r0+8], xmm2
405
+ movhlps m3, m0
406
+ movhlps m4, m1
407
+ movhlps m5, m2
408
+ paddw m0, m3
409
+ paddw m1, m4
410
+ paddw m2, m5
411
+ movd [r5+0], m0
412
+ movd [r5+4], m1
413
+ movd [r5+8], m2
414
%endif
415
RET
416
%endmacro
417
418
%macro SAD_X4_END_SSE2 0
419
- mov r0, r6mp
420
- psllq xmm1, 32
421
- psllq xmm3, 32
422
- paddw xmm0, xmm1
423
- paddw xmm2, xmm3
424
- movhlps xmm1, xmm0
425
- movhlps xmm3, xmm2
426
- paddw xmm0, xmm1
427
- paddw xmm2, xmm3
428
- movq [r0+0], xmm0
429
- movq [r0+8], xmm2
430
+ mov r0, r6mp
431
+%if cpuflag(ssse3)
432
+ packssdw m0, m1
433
+ packssdw m2, m3
434
+ phaddd m0, m2
435
+ mova [r0], m0
436
+%else
437
+ psllq m1, 32
438
+ psllq m3, 32
439
+ paddw m0, m1
440
+ paddw m2, m3
441
+ movhlps m1, m0
442
+ movhlps m3, m2
443
+ paddw m0, m1
444
+ paddw m2, m3
445
+ movq [r0+0], m0
446
+ movq [r0+8], m2
447
+%endif
448
RET
449
%endmacro
450
451
%macro SAD_X4_START_2x8P_SSSE3 0
452
- movddup xmm4, [r0]
453
- movq xmm0, [r1]
454
- movq xmm1, [r3]
455
- movhps xmm0, [r2]
456
- movhps xmm1, [r4]
457
- movddup xmm5, [r0+FENC_STRIDE]
458
- movq xmm2, [r1+r5]
459
- movq xmm3, [r3+r5]
460
- movhps xmm2, [r2+r5]
461
- movhps xmm3, [r4+r5]
462
- psadbw xmm0, xmm4
463
- psadbw xmm1, xmm4
464
- psadbw xmm2, xmm5
465
- psadbw xmm3, xmm5
466
- paddw xmm0, xmm2
467
- paddw xmm1, xmm3
468
+ movddup m4, [r0]
469
+ movq m0, [r1]
470
+ movq m1, [r3]
471
+ movhps m0, [r2]
472
+ movhps m1, [r4]
473
+ movddup m5, [r0+FENC_STRIDE]
474
+ movq m2, [r1+r5]
475
+ movq m3, [r3+r5]
476
+ movhps m2, [r2+r5]
477
+ movhps m3, [r4+r5]
478
+ psadbw m0, m4
479
+ psadbw m1, m4
480
+ psadbw m2, m5
481
+ psadbw m3, m5
482
+ paddw m0, m2
483
+ paddw m1, m3
484
%endmacro
485
486
%macro SAD_X4_2x8P_SSSE3 4
487
- movddup xmm6, [r0+%1]
488
- movq xmm2, [r1+%2]
489
- movq xmm3, [r3+%2]
490
- movhps xmm2, [r2+%2]
491
- movhps xmm3, [r4+%2]
492
- movddup xmm7, [r0+%3]
493
- movq xmm4, [r1+%4]
494
- movq xmm5, [r3+%4]
495
- movhps xmm4, [r2+%4]
496
- movhps xmm5, [r4+%4]
497
- psadbw xmm2, xmm6
498
- psadbw xmm3, xmm6
499
- psadbw xmm4, xmm7
500
- psadbw xmm5, xmm7
501
- paddw xmm0, xmm2
502
- paddw xmm1, xmm3
503
- paddw xmm0, xmm4
504
- paddw xmm1, xmm5
505
+ movddup m6, [r0+%1]
506
+ movq m2, [r1+%2]
507
+ movq m3, [r3+%2]
508
+ movhps m2, [r2+%2]
509
+ movhps m3, [r4+%2]
510
+ movddup m7, [r0+%3]
511
+ movq m4, [r1+%4]
512
+ movq m5, [r3+%4]
513
+ movhps m4, [r2+%4]
514
+ movhps m5, [r4+%4]
515
+ psadbw m2, m6
516
+ psadbw m3, m6
517
+ psadbw m4, m7
518
+ psadbw m5, m7
519
+ paddw m0, m2
520
+ paddw m1, m3
521
+ paddw m0, m4
522
+ paddw m1, m5
523
%endmacro
524
525
%macro SAD_X4_4x8P_SSSE3 2
526
527
%endmacro
528
529
%macro SAD_X4_END_SSSE3 0
530
- mov r0, r6mp
531
- packssdw xmm0, xmm1
532
- movdqa [r0], xmm0
533
+ mov r0, r6mp
534
+ packssdw m0, m1
535
+ mova [r0], m0
536
RET
537
%endmacro
538
539
540
541
%macro SAD_X3_4x16P_AVX2 2
542
%if %1==0
543
-%if UNIX64
544
- mov r6, r5
545
-%endif
546
- lea r5, [r4*3]
547
+ lea t0, [r4*3]
548
SAD_X3_START_2x16P_AVX2
549
%else
550
SAD_X3_2x16P_AVX2 FENC_STRIDE*(0+(%1&1)*4), r4*0, r4*1
551
%endif
552
- SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, r5
553
+ SAD_X3_2x16P_AVX2 FENC_STRIDE*(2+(%1&1)*4), r4*2, t0
554
%if %1 != %2-1
555
%if (%1&1) != 0
556
add r0, 8*FENC_STRIDE
557
558
vbroadcasti128 m4, [r0]
559
vbroadcasti128 m5, [r0+FENC_STRIDE]
560
movu xm0, [r1]
561
- movu xm1, [r3]
562
+ movu xm1, [r2]
563
movu xm2, [r1+r5]
564
- movu xm3, [r3+r5]
565
- vinserti128 m0, m0, [r2], 1
566
+ movu xm3, [r2+r5]
567
+ vinserti128 m0, m0, [r3], 1
568
vinserti128 m1, m1, [r4], 1
569
- vinserti128 m2, m2, [r2+r5], 1
570
+ vinserti128 m2, m2, [r3+r5], 1
571
vinserti128 m3, m3, [r4+r5], 1
572
psadbw m0, m4
573
psadbw m1, m4
574
575
vbroadcasti128 m6, [r0+%1]
576
vbroadcasti128 m7, [r0+%3]
577
movu xm2, [r1+%2]
578
- movu xm3, [r3+%2]
579
+ movu xm3, [r2+%2]
580
movu xm4, [r1+%4]
581
- movu xm5, [r3+%4]
582
- vinserti128 m2, m2, [r2+%2], 1
583
+ movu xm5, [r2+%4]
584
+ vinserti128 m2, m2, [r3+%2], 1
585
vinserti128 m3, m3, [r4+%2], 1
586
- vinserti128 m4, m4, [r2+%4], 1
587
+ vinserti128 m4, m4, [r3+%4], 1
588
vinserti128 m5, m5, [r4+%4], 1
589
psadbw m2, m6
590
psadbw m3, m6
591
592
%endmacro
593
594
%macro SAD_X3_END_AVX2 0
595
- vextracti128 xm4, m0, 1
596
- vextracti128 xm5, m1, 1
597
- vextracti128 xm6, m2, 1
598
- paddw xm0, xm4
599
- paddw xm1, xm5
600
- paddw xm2, xm6
601
- movhlps xm4, xm0
602
- movhlps xm5, xm1
603
- movhlps xm6, xm2
604
- paddw xm0, xm4
605
- paddw xm1, xm5
606
- paddw xm2, xm6
607
-%if UNIX64
608
- movd [r6+0], xm0
609
- movd [r6+4], xm1
610
- movd [r6+8], xm2
611
-%else
612
- mov r0, r5mp
613
- movd [r0+0], xm0
614
- movd [r0+4], xm1
615
- movd [r0+8], xm2
616
-%endif
617
+ movifnidn r5, r5mp
618
+ packssdw m0, m1 ; 0 0 1 1 0 0 1 1
619
+ packssdw m2, m2 ; 2 2 _ _ 2 2 _ _
620
+ phaddd m0, m2 ; 0 1 2 _ 0 1 2 _
621
+ vextracti128 xm1, m0, 1
622
+ paddd xm0, xm1 ; 0 1 2 _
623
+ mova [r5], xm0
624
RET
625
%endmacro
626
627
%macro SAD_X4_END_AVX2 0
628
- mov r0, r6mp
629
- punpckhqdq m2, m0, m0
630
- punpckhqdq m3, m1, m1
631
- paddw m0, m2
632
- paddw m1, m3
633
- packssdw m0, m1
634
- mova xm2, [deinterleave_sadx4]
635
- vpermd m0, m2, m0
636
- mova [r0], xm0
637
+ mov r0, r6mp
638
+ packssdw m0, m1 ; 0 0 1 1 2 2 3 3
639
+ vextracti128 xm1, m0, 1
640
+ phaddd xm0, xm1 ; 0 1 2 3
641
+ mova [r0], xm0
642
RET
643
%endmacro
644
645
646
; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
647
; uint8_t *pix2, intptr_t i_stride, int scores[3] )
648
;-----------------------------------------------------------------------------
649
-%macro SAD_X_SSE2 3
650
-cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,9
651
+%macro SAD_X_SSE2 4
652
+cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
653
%assign x 0
654
%rep %3/4
655
SAD_X%1_4x%2P_SSE2 x, %3/4
656
657
%endmacro
658
659
INIT_XMM sse2
660
-SAD_X_SSE2 3, 16, 16
661
-SAD_X_SSE2 3, 16, 8
662
-SAD_X_SSE2 3, 8, 16
663
-SAD_X_SSE2 3, 8, 8
664
-SAD_X_SSE2 3, 8, 4
665
-SAD_X_SSE2 4, 16, 16
666
-SAD_X_SSE2 4, 16, 8
667
-SAD_X_SSE2 4, 8, 16
668
-SAD_X_SSE2 4, 8, 8
669
-SAD_X_SSE2 4, 8, 4
670
-
671
-INIT_XMM sse2, misalign
672
-SAD_X_SSE2 3, 16, 16
673
-SAD_X_SSE2 3, 16, 8
674
-SAD_X_SSE2 4, 16, 16
675
-SAD_X_SSE2 4, 16, 8
676
+SAD_X_SSE2 3, 16, 16, 7
677
+SAD_X_SSE2 3, 16, 8, 7
678
+SAD_X_SSE2 3, 8, 16, 7
679
+SAD_X_SSE2 3, 8, 8, 7
680
+SAD_X_SSE2 3, 8, 4, 7
681
+SAD_X_SSE2 4, 16, 16, 7
682
+SAD_X_SSE2 4, 16, 8, 7
683
+SAD_X_SSE2 4, 8, 16, 7
684
+SAD_X_SSE2 4, 8, 8, 7
685
+SAD_X_SSE2 4, 8, 4, 7
686
687
INIT_XMM sse3
688
-SAD_X_SSE2 3, 16, 16
689
-SAD_X_SSE2 3, 16, 8
690
-SAD_X_SSE2 4, 16, 16
691
-SAD_X_SSE2 4, 16, 8
692
+SAD_X_SSE2 3, 16, 16, 7
693
+SAD_X_SSE2 3, 16, 8, 7
694
+SAD_X_SSE2 4, 16, 16, 7
695
+SAD_X_SSE2 4, 16, 8, 7
696
697
%macro SAD_X_SSSE3 3
698
cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8
699
700
%endmacro
701
702
INIT_XMM ssse3
703
-SAD_X_SSSE3 4, 8, 16
704
-SAD_X_SSSE3 4, 8, 8
705
-SAD_X_SSSE3 4, 8, 4
706
+SAD_X_SSE2 3, 16, 16, 7
707
+SAD_X_SSE2 3, 16, 8, 7
708
+SAD_X_SSE2 4, 16, 16, 7
709
+SAD_X_SSE2 4, 16, 8, 7
710
+SAD_X_SSSE3 4, 8, 16
711
+SAD_X_SSSE3 4, 8, 8
712
+SAD_X_SSSE3 4, 8, 4
713
+
714
+INIT_XMM avx
715
+SAD_X_SSE2 3, 16, 16, 6
716
+SAD_X_SSE2 3, 16, 8, 6
717
+SAD_X_SSE2 4, 16, 16, 7
718
+SAD_X_SSE2 4, 16, 8, 7
719
720
%macro SAD_X_AVX2 4
721
cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,%4
722
x264-snapshot-20130723-2245.tar.bz2/common/x86/sad16-a.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/sad16-a.asm
Changed
10
1
2
;*****************************************************************************
3
;* sad16-a.asm: x86 high depth sad functions
4
;*****************************************************************************
5
-;* Copyright (C) 2010-2013 x264 project
6
+;* Copyright (C) 2010-2014 x264 project
7
;*
8
;* Authors: Oskar Arvidsson <oskar@irock.se>
9
;* Henrik Gramner <henrik@gramner.com>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/trellis-64.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/trellis-64.asm
Changed
10
1
2
;*****************************************************************************
3
;* trellis-64.asm: x86_64 trellis quantization
4
;*****************************************************************************
5
-;* Copyright (C) 2012-2013 x264 project
6
+;* Copyright (C) 2012-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;*
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/util.h -> x264-snapshot-20140321-2245.tar.bz2/common/x86/util.h
Changed
10
1
2
/*****************************************************************************
3
* util.h: x86 inline asm
4
*****************************************************************************
5
- * Copyright (C) 2008-2013 x264 project
6
+ * Copyright (C) 2008-2014 x264 project
7
*
8
* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86inc.asm
Changed
369
1
2
;*****************************************************************************
3
;* x86inc.asm: x264asm abstraction layer
4
;*****************************************************************************
5
-;* Copyright (C) 2005-2013 x264 project
6
+;* Copyright (C) 2005-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Anton Mitrofanov <BugMaster@narod.ru>
10
11
%define public_prefix private_prefix
12
%endif
13
14
+%ifndef STACK_ALIGNMENT
15
+ %if ARCH_X86_64
16
+ %define STACK_ALIGNMENT 16
17
+ %else
18
+ %define STACK_ALIGNMENT 4
19
+ %endif
20
+%endif
21
+
22
%define WIN64 0
23
%define UNIX64 0
24
%if ARCH_X86_64
25
26
%define WIN64 1
27
%elifidn __OUTPUT_FORMAT__,win64
28
%define WIN64 1
29
+ %elifidn __OUTPUT_FORMAT__,x64
30
+ %define WIN64 1
31
%else
32
%define UNIX64 1
33
%endif
34
35
; %1 = number of arguments. loads them from stack if needed.
36
; %2 = number of registers used. pushes callee-saved regs if needed.
37
; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
38
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
39
-; MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
40
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
41
+; allocating the specified stack size. If the required stack alignment is
42
+; larger than the known stack alignment the stack will be manually aligned
43
; and an extra register will be allocated to hold the original stack
44
; pointer (to not invalidate r0m etc.). To prevent the use of an extra
45
; register as stack pointer, request a negative stack size.
46
47
; PROLOGUE can also be invoked by adding the same options to cglobal
48
49
; e.g.
50
-; cglobal foo, 2,3,0, dst, src, tmp
51
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
52
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
53
+; declares a function (foo) that automatically loads two arguments (dst and
54
+; src) into registers, uses one additional register (tmp) plus 7 vector
55
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
56
57
; TODO Some functions can use some args directly from the stack. If they're the
58
; last args then you can just not declare them, but if they're in the middle
59
60
%assign n_arg_names %0
61
%endmacro
62
63
+%define required_stack_alignment ((mmsize + 15) & ~15)
64
+
65
%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
66
%ifnum %1
67
%if %1 != 0
68
- %assign %%stack_alignment ((mmsize + 15) & ~15)
69
+ %assign %%pad 0
70
%assign stack_size %1
71
%if stack_size < 0
72
%assign stack_size -stack_size
73
%endif
74
- %assign stack_size_padded stack_size
75
%if WIN64
76
- %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
77
+ %assign %%pad %%pad + 32 ; shadow space
78
%if mmsize != 8
79
%assign xmm_regs_used %2
80
%if xmm_regs_used > 8
81
- %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
82
+ %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
83
%endif
84
%endif
85
%endif
86
- %if mmsize <= 16 && HAVE_ALIGNED_STACK
87
- %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
88
+ %if required_stack_alignment <= STACK_ALIGNMENT
89
+ ; maintain the current stack alignment
90
+ %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
91
SUB rsp, stack_size_padded
92
%else
93
%assign %%reg_num (regs_used - 1)
94
95
; it, i.e. in [rsp+stack_size_padded], so we can restore the
96
; stack in a single instruction (i.e. mov rsp, rstk or mov
97
; rsp, [rsp+stack_size_padded])
98
- mov rstk, rsp
99
%if %1 < 0 ; need to store rsp on stack
100
- sub rsp, gprsize+stack_size_padded
101
- and rsp, ~(%%stack_alignment-1)
102
- %xdefine rstkm [rsp+stack_size_padded]
103
- mov rstkm, rstk
104
+ %xdefine rstkm [rsp + stack_size + %%pad]
105
+ %assign %%pad %%pad + gprsize
106
%else ; can keep rsp in rstk during whole function
107
- sub rsp, stack_size_padded
108
- and rsp, ~(%%stack_alignment-1)
109
%xdefine rstkm rstk
110
%endif
111
+ %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
112
+ mov rstk, rsp
113
+ and rsp, ~(required_stack_alignment-1)
114
+ sub rsp, stack_size_padded
115
+ movifnidn rstkm, rstk
116
%endif
117
WIN64_PUSH_XMM
118
%endif
119
120
121
%macro SETUP_STACK_POINTER 1
122
%ifnum %1
123
- %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
124
+ %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
125
%if %1 > 0
126
%assign regs_used (regs_used + 1)
127
%elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
128
129
%assign xmm_regs_used %1
130
ASSERT xmm_regs_used <= 16
131
%if xmm_regs_used > 8
132
- %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
133
+ ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
134
+ %assign %%pad (xmm_regs_used-8)*16 + 32
135
+ %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
136
SUB rsp, stack_size_padded
137
%endif
138
WIN64_PUSH_XMM
139
140
%endrep
141
%endif
142
%if stack_size_padded > 0
143
- %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
144
+ %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
145
mov rsp, rstkm
146
%else
147
add %1, stack_size_padded
148
149
150
%macro RET 0
151
%if stack_size_padded > 0
152
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
153
+%if required_stack_alignment > STACK_ALIGNMENT
154
mov rsp, rstkm
155
%else
156
add rsp, stack_size_padded
157
158
159
%macro RET 0
160
%if stack_size_padded > 0
161
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
162
+%if required_stack_alignment > STACK_ALIGNMENT
163
mov rsp, rstkm
164
%else
165
add rsp, stack_size_padded
166
167
%assign cpuflags_cache64 (1<<17)
168
%assign cpuflags_slowctz (1<<18)
169
%assign cpuflags_lzcnt (1<<19)
170
-%assign cpuflags_misalign (1<<20)
171
-%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant
172
-%assign cpuflags_atom (1<<22)
173
-%assign cpuflags_bmi1 (1<<23)|cpuflags_lzcnt
174
-%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1
175
+%assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant
176
+%assign cpuflags_atom (1<<21)
177
+%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt
178
+%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1
179
180
%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
181
%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
182
183
%endmacro
184
185
; Merge mmx and sse*
186
-; m# is a simd regsiter of the currently selected size
187
-; xm# is the corresponding xmmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
188
-; ym# is the corresponding ymmreg (if selcted xmm or ymm size), or mmreg (if selected mmx)
189
+; m# is a simd register of the currently selected size
190
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
191
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
192
; (All 3 remain in sync through SWAP.)
193
194
%macro CAT_XDEFINE 3
195
196
%assign %%i 0
197
%rep 8
198
CAT_XDEFINE m, %%i, mm %+ %%i
199
- CAT_XDEFINE nmm, %%i, %%i
200
+ CAT_XDEFINE nnmm, %%i, %%i
201
%assign %%i %%i+1
202
%endrep
203
%rep 8
204
CAT_UNDEF m, %%i
205
- CAT_UNDEF nmm, %%i
206
+ CAT_UNDEF nnmm, %%i
207
%assign %%i %%i+1
208
%endrep
209
INIT_CPUFLAGS %1
210
211
%assign %%i 0
212
%rep num_mmregs
213
CAT_XDEFINE m, %%i, xmm %+ %%i
214
- CAT_XDEFINE nxmm, %%i, %%i
215
+ CAT_XDEFINE nnxmm, %%i, %%i
216
%assign %%i %%i+1
217
%endrep
218
INIT_CPUFLAGS %1
219
220
%define xmmxmm%1 xmm%1
221
%define xmmymm%1 xmm%1
222
%define ymmmm%1 mm%1
223
- %define ymmxmm%1 ymm%1
224
+ %define ymmxmm%1 xmm%1
225
%define ymmymm%1 ymm%1
226
%define xm%1 xmm %+ m%1
227
%define ym%1 ymm %+ m%1
228
229
%endrep
230
%rep %0/2
231
%xdefine m%1 %%tmp%2
232
- CAT_XDEFINE n, m%1, %1
233
+ CAT_XDEFINE nn, m%1, %1
234
%rotate 2
235
%endrep
236
%endmacro
237
238
%xdefine %%tmp m%1
239
%xdefine m%1 m%2
240
%xdefine m%2 %%tmp
241
- CAT_XDEFINE n, m%1, %1
242
- CAT_XDEFINE n, m%2, %2
243
+ CAT_XDEFINE nn, m%1, %1
244
+ CAT_XDEFINE nn, m%2, %2
245
%rotate 1
246
%endrep
247
%endmacro
248
249
%macro SWAP_INTERNAL_NAME 2-*
250
- %xdefine %%args n %+ %1
251
+ %xdefine %%args nn %+ %1
252
%rep %0-1
253
- %xdefine %%args %%args, n %+ %2
254
+ %xdefine %%args %%args, nn %+ %2
255
%rotate 1
256
%endrep
257
SWAP_INTERNAL_NUM %%args
258
259
%assign %%i 0
260
%rep num_mmregs
261
CAT_XDEFINE m, %%i, %1_m %+ %%i
262
- CAT_XDEFINE n, m %+ %%i, %%i
263
+ CAT_XDEFINE nn, m %+ %%i, %%i
264
%assign %%i %%i+1
265
%endrep
266
%endif
267
268
;%5+: operands
269
%macro RUN_AVX_INSTR 5-8+
270
%ifnum sizeof%6
271
- %assign %%sizeofreg sizeof%6
272
+ %assign __sizeofreg sizeof%6
273
%elifnum sizeof%5
274
- %assign %%sizeofreg sizeof%5
275
+ %assign __sizeofreg sizeof%5
276
%else
277
- %assign %%sizeofreg mmsize
278
+ %assign __sizeofreg mmsize
279
%endif
280
- %assign %%emulate_avx 0
281
- %if avx_enabled && %%sizeofreg >= 16
282
- %xdefine %%instr v%1
283
+ %assign __emulate_avx 0
284
+ %if avx_enabled && __sizeofreg >= 16
285
+ %xdefine __instr v%1
286
%else
287
- %xdefine %%instr %1
288
+ %xdefine __instr %1
289
%if %0 >= 7+%3
290
- %assign %%emulate_avx 1
291
+ %assign __emulate_avx 1
292
%endif
293
%endif
294
295
- %if %%emulate_avx
296
- %xdefine %%src1 %6
297
- %xdefine %%src2 %7
298
+ %if __emulate_avx
299
+ %xdefine __src1 %6
300
+ %xdefine __src2 %7
301
%ifnidn %5, %6
302
%if %0 >= 8
303
CHECK_AVX_INSTR_EMU {%1 %5, %6, %7, %8}, %5, %7, %8
304
305
; 3-operand AVX instructions with a memory arg can only have it in src2,
306
; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
307
; So, if the instruction is commutative with a memory arg, swap them.
308
- %xdefine %%src1 %7
309
- %xdefine %%src2 %6
310
+ %xdefine __src1 %7
311
+ %xdefine __src2 %6
312
%endif
313
%endif
314
- %if %%sizeofreg == 8
315
- MOVQ %5, %%src1
316
+ %if __sizeofreg == 8
317
+ MOVQ %5, __src1
318
%elif %2
319
- MOVAPS %5, %%src1
320
+ MOVAPS %5, __src1
321
%else
322
- MOVDQA %5, %%src1
323
+ MOVDQA %5, __src1
324
%endif
325
%endif
326
%if %0 >= 8
327
- %1 %5, %%src2, %8
328
+ %1 %5, __src2, %8
329
%else
330
- %1 %5, %%src2
331
+ %1 %5, __src2
332
%endif
333
%elif %0 >= 8
334
- %%instr %5, %6, %7, %8
335
+ __instr %5, %6, %7, %8
336
%elif %0 == 7
337
- %%instr %5, %6, %7
338
+ __instr %5, %6, %7
339
%elif %0 == 6
340
- %%instr %5, %6
341
+ __instr %5, %6
342
%else
343
- %%instr %5
344
+ __instr %5
345
%endif
346
%endmacro
347
348
349
%macro %1 4-7 %1, %2, %3
350
%if cpuflag(xop)
351
v%5 %1, %2, %3, %4
352
- %else
353
+ %elifnidn %1, %4
354
%6 %1, %2, %3
355
%7 %1, %4
356
+ %else
357
+ %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
358
%endif
359
%endmacro
360
%endmacro
361
362
-FMA_INSTR pmacsdd, pmulld, paddd
363
FMA_INSTR pmacsww, pmullw, paddw
364
+FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
365
+FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
366
FMA_INSTR pmadcswd, pmaddwd, paddd
367
368
; convert FMA4 to FMA3 if possible
369
x264-snapshot-20130723-2245.tar.bz2/common/x86/x86util.asm -> x264-snapshot-20140321-2245.tar.bz2/common/x86/x86util.asm
Changed
10
1
2
;*****************************************************************************
3
;* x86util.asm: x86 utility macros
4
;*****************************************************************************
5
-;* Copyright (C) 2008-2013 x264 project
6
+;* Copyright (C) 2008-2014 x264 project
7
;*
8
;* Authors: Holger Lubitz <holger@lubitz.org>
9
;* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/configure -> x264-snapshot-20140321-2245.tar.bz2/configure
Changed
346
1
2
--disable-thread disable multithreaded encoding
3
--enable-win32thread use win32threads (windows only)
4
--disable-interlaced disable interlaced encoding support
5
- --enable-visualize enable visualization (X11 only)
6
--bit-depth=BIT_DEPTH set output bit depth (8-10) [8]
7
--chroma-format=FORMAT output chroma format (420, 422, 444, all) [all]
8
9
10
--disable-lavf disable libavformat support
11
--disable-ffms disable ffmpegsource support
12
--disable-gpac disable gpac support
13
+ --disable-lsmash disable lsmash support
14
15
EOF
16
exit 1
17
18
lavf="auto"
19
ffms="auto"
20
gpac="auto"
21
+lsmash="auto"
22
+mp4="no"
23
gpl="yes"
24
thread="auto"
25
swscale="auto"
26
27
gprof="no"
28
strip="no"
29
pic="no"
30
-vis="no"
31
bit_depth="8"
32
chroma_format="all"
33
compiler="GNU"
34
35
EXE=""
36
37
# list of all preprocessor HAVE values we can define
38
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
39
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
40
+ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH"
41
42
# parse options
43
44
45
--disable-gpac)
46
gpac="no"
47
;;
48
+ --disable-lsmash)
49
+ lsmash="no"
50
+ ;;
51
--disable-gpl)
52
gpl="no"
53
;;
54
55
--enable-pic)
56
pic="yes"
57
;;
58
- --enable-visualize)
59
- vis="yes"
60
- ;;
61
--host=*)
62
host="$optarg"
63
;;
64
65
AR="${AR-${cross_prefix}ar}"
66
RANLIB="${RANLIB-${cross_prefix}ranlib}"
67
STRIP="${STRIP-${cross_prefix}strip}"
68
+INSTALL="${INSTALL-install}"
69
70
if [ "x$host" = x ]; then
71
host=`${SRCPATH}/config.guess`
72
73
CFLAGS="$CFLAGS -mno-cygwin"
74
LDFLAGS="$LDFLAGS -mno-cygwin"
75
fi
76
- if cpp_check "" "" "defined(__CYGWIN32__)" ; then
77
+ if cpp_check "" "" "defined(__CYGWIN__)" ; then
78
define HAVE_MALLOC_H
79
SYS="CYGWIN"
80
else
81
SYS="WINDOWS"
82
DEVNULL="NUL"
83
+ LDFLAGSCLI="$LDFLAGSCLI -lshell32"
84
RC="${RC-${cross_prefix}windres}"
85
fi
86
;;
87
88
SYS="WINDOWS"
89
EXE=".exe"
90
DEVNULL="NUL"
91
+ LDFLAGSCLI="$LDFLAGSCLI -lshell32"
92
[ $compiler = ICL ] && RC="${RC-rc}" || RC="${RC-${cross_prefix}windres}"
93
;;
94
sunos*|solaris*)
95
96
else
97
LDFLAGS="$LDFLAGS /usr/lib/values-xpg6.o"
98
fi
99
+ if test -x /usr/ucb/install ; then
100
+ INSTALL=/usr/ucb/install
101
+ elif test -x /usr/bin/ginstall ; then
102
+ # OpenSolaris
103
+ INSTALL=/usr/bin/ginstall
104
+ elif test -x /usr/gnu/bin/install ; then
105
+ # OpenSolaris
106
+ INSTALL=/usr/gnu/bin/install
107
+ fi
108
HAVE_GETOPT_LONG=0
109
;;
110
*qnx*)
111
112
113
LDFLAGS="$LDFLAGS $libm"
114
115
-aligned_stack=1
116
+stack_alignment=16
117
case $host_cpu in
118
i*86)
119
ARCH="X86"
120
121
if [ $SYS = LINUX ]; then
122
# < 11 is completely incapable of keeping a mod16 stack
123
if cpp_check "" "" "__INTEL_COMPILER < 1100" ; then
124
- define BROKEN_STACK_ALIGNMENT
125
- aligned_stack=0
126
+ stack_alignment=4
127
# 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so.
128
elif cpp_check "" "" "__INTEL_COMPILER < 1200" ; then
129
CFLAGS="$CFLAGS -falign-stack=assume-16-byte"
130
131
# >= 12 defaults to a mod16 stack
132
fi
133
# icl on windows has no mod16 stack support
134
- [ $SYS = WINDOWS ] && define BROKEN_STACK_ALIGNMENT && aligned_stack=0
135
+ [ $SYS = WINDOWS ] && stack_alignment=4
136
fi
137
if [ "$SYS" = MACOSX ]; then
138
ASFLAGS="$ASFLAGS -f macho -DPREFIX"
139
140
CFLAGS="$CFLAGS -arch x86_64"
141
LDFLAGS="$LDFLAGS -arch x86_64"
142
fi
143
- elif [ "$SYS" = WINDOWS ]; then
144
+ elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then
145
ASFLAGS="$ASFLAGS -f win32 -m amd64"
146
# only the GNU toolchain is inconsistent in prefixing function names with _
147
[ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX"
148
149
ARCH="$(echo $host_cpu | tr a-z A-Z)"
150
;;
151
esac
152
-ASFLAGS="$ASFLAGS -DHAVE_ALIGNED_STACK=${aligned_stack}"
153
154
if [ $SYS = WINDOWS ]; then
155
if ! rc_check "0 RCDATA {0}" ; then
156
157
echo "If you really want to compile without asm, configure with --disable-asm."
158
exit 1
159
fi
160
+ ASFLAGS="$ASFLAGS -Worphan-labels"
161
define HAVE_MMX
162
- if cc_check '' -mpreferred-stack-boundary=5 ; then
163
+ if [ $compiler = GNU ] && cc_check '' -mpreferred-stack-boundary=5 ; then
164
CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
165
- define HAVE_32B_STACK_ALIGNMENT
166
+ stack_alignment=32
167
fi
168
fi
169
170
171
define ARCH_$ARCH
172
define SYS_$SYS
173
174
+define STACK_ALIGNMENT $stack_alignment
175
+ASFLAGS="$ASFLAGS -DSTACK_ALIGNMENT=$stack_alignment"
176
+
177
# skip endianness check for Intel Compiler, as all supported platforms are little. the -ipo flag will also cause the check to fail
178
if [ $compiler = GNU ]; then
179
echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
180
181
fi
182
;;
183
QNX)
184
- cc_check pthread.h -lc && thread="posix" && libpthread="-lc"
185
+ cc_check pthread.h -lc "pthread_create(0,0,0,0);" && thread="posix" && libpthread="-lc"
186
;;
187
*)
188
- cc_check pthread.h -lpthread && thread="posix" && libpthread="-lpthread"
189
+ if cc_check pthread.h -lpthread "pthread_create(0,0,0,0);" ; then
190
+ thread="posix"
191
+ libpthread="-lpthread"
192
+ else
193
+ cc_check pthread.h "" "pthread_create(0,0,0,0);" && thread="posix" && libpthread=""
194
+ fi
195
;;
196
esac
197
fi
198
199
define HAVE_LOG2F
200
fi
201
202
-if [ "$vis" = "yes" ] ; then
203
- save_CFLAGS="$CFLAGS"
204
- CFLAGS="$CFLAGS -I/usr/X11R6/include"
205
- if cc_check "X11/Xlib.h" "-L/usr/X11R6/lib -lX11" "XOpenDisplay(0);" ; then
206
- LDFLAGS="-L/usr/X11R6/lib -lX11 $LDFLAGS"
207
- define HAVE_VISUALIZE
208
- else
209
- vis="no"
210
- CFLAGS="$save_CFLAGS"
211
- fi
212
+if [ "$SYS" = "LINUX" -a \( "$ARCH" = "X86" -o "$ARCH" = "X86_64" \) ] && cc_check "sys/mman.h" "" "MADV_HUGEPAGE;" ; then
213
+ define HAVE_THP
214
fi
215
216
if [ "$swscale" = "auto" ] ; then
217
218
[ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil"
219
220
if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then
221
- if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(PIX_FMT_RGB)" ; then
222
+ if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then
223
swscale="yes"
224
else
225
- echo "Warning: PIX_FMT_RGB is missing from libavutil, update for swscale support"
226
+ echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support"
227
fi
228
fi
229
fi
230
231
fi
232
if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
233
LAVF_LIBS="-lavformat"
234
- for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do
235
+ for lib in -lpostproc -lavcodec -lavcore -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do
236
cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
237
done
238
fi
239
240
fi
241
fi
242
243
-if [ "$gpac" = "auto" ] ; then
244
+if [ "$lsmash" = "auto" ] ; then
245
+ lsmash="no"
246
+ if ${cross_prefix}pkg-config --exists liblsmash 2>/dev/null; then
247
+ LSMASH_LIBS="$LSMASH_LIBS $(${cross_prefix}pkg-config --libs liblsmash)"
248
+ LSMASH_CFLAGS="$LSMASH_CFLAGS $(${cross_prefix}pkg-config --cflags liblsmash)"
249
+ fi
250
+ [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash"
251
+
252
+ if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then
253
+ if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 0 || (LSMASH_VERSION_MAJOR == 0 && LSMASH_VERSION_MINOR >= 1)" ; then
254
+ lsmash="yes"
255
+ else
256
+ echo "Warning: lsmash is too old, update to rev.751 or later"
257
+ fi
258
+ fi
259
+fi
260
+
261
+if [ "$gpac" = "auto" -a "$lsmash" != "yes" ] ; then
262
gpac="no"
263
- cc_check "" -lz && GPAC_LIBS="-lgpac_static -lz" || GPAC_LIBS="-lgpac_static"
264
+ GPAC_LIBS="-lgpac_static"
265
+ cc_check "" -lz && GPAC_LIBS="$GPAC_LIBS -lz"
266
if [ "$SYS" = "WINDOWS" ] ; then
267
- GPAC_LIBS="$GPAC_LIBS -lwinmm"
268
+ cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32"
269
+ cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm"
270
fi
271
if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then
272
if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
273
274
fi
275
fi
276
fi
277
-if [ "$gpac" = "yes" ] ; then
278
+
279
+if [ "$lsmash" = "yes" ] ; then
280
+ mp4="lsmash"
281
+ LDFLAGSCLI="$LSMASH_LIBS $LDFLAGSCLI"
282
+ CFLAGS="$CFLAGS $LSMASH_CFLAGS"
283
+ define HAVE_LSMASH
284
+elif [ "$gpac" = "yes" ] ; then
285
+ mp4="gpac"
286
define HAVE_GPAC
287
- if cc_check gpac/isomedia.h "-Werror $GPAC_LIBS" "void *p; p = gf_malloc(1); gf_free(p);" ; then
288
- define HAVE_GF_MALLOC
289
- fi
290
LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI"
291
fi
292
293
if [ "$avs" = "auto" ] ; then
294
avs="no"
295
# cygwin can use avisynth if it can use LoadLibrary
296
- if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
297
+ if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
298
avs="avisynth"
299
define HAVE_AVS
300
define USE_AVXSYNTH 0
301
302
fi
303
log_ok
304
# cygwin can use opencl if it can use LoadLibrary
305
- if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
306
+ if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then
307
opencl="yes"
308
define HAVE_OPENCL
309
elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
310
311
AR=$AR
312
RANLIB=$RANLIB
313
STRIP=$STRIP
314
+INSTALL=$INSTALL
315
AS=$AS
316
ASFLAGS=$ASFLAGS
317
RC=$RC
318
319
Name: x264
320
Description: H.264 (MPEG4 AVC) encoder library
321
Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
322
-Libs: -L$libdir -lx264
323
-Libs.private: $libpthread $libm $libdl
324
+Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl)
325
+Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl)
326
Cflags: -I$includedir
327
EOF
328
329
330
avs: $avs
331
lavf: $lavf
332
ffms: $ffms
333
-gpac: $gpac
334
+mp4: $mp4
335
gpl: $gpl
336
thread: $thread
337
opencl: $opencl
338
339
gprof: $gprof
340
strip: $strip
341
PIC: $pic
342
-visualize: $vis
343
bit depth: $bit_depth
344
chroma format: $chroma_format
345
EOF
346
x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.c
Changed
193
1
2
/*****************************************************************************
3
* analyse.c: macroblock analysis
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
/* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
12
* PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
13
uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
14
- a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
15
+ a->i_satd_pcm = !h->param.i_avcintra_class && !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
16
17
a->b_fast_intra = 0;
18
a->b_avoid_topright = 0;
19
20
{I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
21
};
22
23
+static const int8_t i8x8_mode_available[2][5][10] =
24
+{
25
+ {
26
+ {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
27
+ {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
28
+ {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
29
+ {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
30
+ {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
31
+ },
32
+ {
33
+ {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
34
+ {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
35
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
36
+ {I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1, -1},
37
+ {I_PRED_4x4_H, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
38
+ }
39
+};
40
+
41
static const int8_t i4x4_mode_available[2][5][10] =
42
{
43
{
44
45
{I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
46
{I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
47
{I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
48
- {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
49
+ {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1, -1},
50
}
51
};
52
53
54
int avoid_topright = force_intra && (i&1);
55
int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
56
idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
57
- return i4x4_mode_available[avoid_topright][idx];
58
+ return i8x8_mode_available[avoid_topright][idx];
59
}
60
61
static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
62
63
int lambda = a->i_lambda;
64
65
/*---------------- Try all mode and calculate their score ---------------*/
66
+ /* Disabled i16x16 for AVC-Intra compat */
67
+ if( !h->param.i_avcintra_class )
68
+ {
69
+ const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
70
71
- /* 16x16 prediction selection */
72
- const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
73
+ /* Not heavily tuned */
74
+ static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
75
+ int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
76
77
- /* Not heavily tuned */
78
- static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
79
- int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
80
-
81
- if( !h->mb.b_lossless && predict_mode[3] >= 0 )
82
- {
83
- h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
84
- a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
85
- a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
86
- a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
87
- COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
88
- COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
89
- COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
90
-
91
- /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
92
- if( a->i_satd_i16x16 <= i16x16_thresh )
93
- {
94
- h->predict_16x16[I_PRED_16x16_P]( p_dst );
95
- a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
96
- a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
97
- COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
98
+ if( !h->mb.b_lossless && predict_mode[3] >= 0 )
99
+ {
100
+ h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
101
+ a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
102
+ a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
103
+ a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
104
+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
105
+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
106
+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
107
+
108
+ /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
109
+ if( a->i_satd_i16x16 <= i16x16_thresh )
110
+ {
111
+ h->predict_16x16[I_PRED_16x16_P]( p_dst );
112
+ a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
113
+ a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
114
+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
115
+ }
116
}
117
- }
118
- else
119
- {
120
- for( ; *predict_mode >= 0; predict_mode++ )
121
+ else
122
{
123
- int i_satd;
124
- int i_mode = *predict_mode;
125
+ for( ; *predict_mode >= 0; predict_mode++ )
126
+ {
127
+ int i_satd;
128
+ int i_mode = *predict_mode;
129
130
- if( h->mb.b_lossless )
131
- x264_predict_lossless_16x16( h, 0, i_mode );
132
- else
133
- h->predict_16x16[i_mode]( p_dst );
134
+ if( h->mb.b_lossless )
135
+ x264_predict_lossless_16x16( h, 0, i_mode );
136
+ else
137
+ h->predict_16x16[i_mode]( p_dst );
138
139
- i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
140
- lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
141
- COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
142
- a->i_satd_i16x16_dir[i_mode] = i_satd;
143
+ i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
144
+ lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
145
+ COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
146
+ a->i_satd_i16x16_dir[i_mode] = i_satd;
147
+ }
148
}
149
- }
150
151
- if( h->sh.i_type == SLICE_TYPE_B )
152
- /* cavlc mb type prefix */
153
- a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
154
+ if( h->sh.i_type == SLICE_TYPE_B )
155
+ /* cavlc mb type prefix */
156
+ a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
157
158
- if( a->i_satd_i16x16 > i16x16_thresh )
159
- return;
160
+ if( a->i_satd_i16x16 > i16x16_thresh )
161
+ return;
162
+ }
163
164
uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
165
/* 8x8 prediction selection */
166
167
int i_best = COST_MAX;
168
int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
169
170
- predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
171
+ const int8_t *predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
172
h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
173
174
if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
175
176
int i_best = COST_MAX;
177
int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
178
179
- predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
180
+ const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx );
181
182
if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
183
/* emulate missing topright samples */
184
185
int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
186
+ ref_costs + l0_mv_cost + l1_mv_cost;
187
188
- if( h->mb.b_chroma_me )
189
+ if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
190
{
191
ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
192
193
x264-snapshot-20130723-2245.tar.bz2/encoder/analyse.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/analyse.h
Changed
10
1
2
/*****************************************************************************
3
* analyse.h: macroblock analysis
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/cabac.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cabac.c
Changed
10
1
2
/*****************************************************************************
3
* cabac.c: cabac bitstream writing
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/cavlc.c
Changed
20
1
2
/*****************************************************************************
3
* cavlc.c: cavlc bitstream writing
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
&& (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
12
{
13
bs_write1( s, MB_INTERLACED );
14
+#if !RDO_SKIP_BS
15
+ h->mb.field_decoding_flag = MB_INTERLACED;
16
+#endif
17
}
18
19
#if !RDO_SKIP_BS
20
x264-snapshot-20130723-2245.tar.bz2/encoder/encoder.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/encoder.c
Changed
923
1
2
/*****************************************************************************
3
* encoder.c: top-level encoder functions
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
#include "macroblock.h"
12
#include "me.h"
13
14
-#if HAVE_VISUALIZE
15
-#include "common/visualize.h"
16
-#endif
17
-
18
//#define DEBUG_MB_TYPE
19
20
#define bs_write_ue bs_write_ue_big
21
22
23
static void x264_frame_dump( x264_t *h )
24
{
25
- FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
26
+ FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" );
27
if( !f )
28
return;
29
30
31
{
32
if( h->param.i_sync_lookahead )
33
x264_lower_thread_priority( 10 );
34
-
35
-#if HAVE_MMX
36
- /* Misalign mask has to be set separately for each thread. */
37
- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
38
- x264_cpu_mask_misalign_sse();
39
-#endif
40
-}
41
-
42
-static void x264_lookahead_thread_init( x264_t *h )
43
-{
44
-#if HAVE_MMX
45
- /* Misalign mask has to be set separately for each thread. */
46
- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
47
- x264_cpu_mask_misalign_sse();
48
-#endif
49
}
50
#endif
51
52
53
x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" );
54
return -1;
55
}
56
- else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_NV16 )
57
+ else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 )
58
{
59
x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" );
60
return -1;
61
62
return -1;
63
}
64
65
+ if( h->param.vui.i_sar_width <= 0 || h->param.vui.i_sar_height <= 0 )
66
+ {
67
+ h->param.vui.i_sar_width = 0;
68
+ h->param.vui.i_sar_height = 0;
69
+ }
70
+
71
if( h->param.i_threads == X264_THREADS_AUTO )
72
h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
73
int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
74
75
{
76
h->param.b_intra_refresh = 0;
77
h->param.analyse.i_weighted_pred = 0;
78
+ h->param.i_frame_reference = 1;
79
+ h->param.i_dpb_size = 1;
80
}
81
82
h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 );
83
84
x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
85
return -1;
86
}
87
+
88
+ if( PARAM_INTERLACED )
89
+ h->param.b_pic_struct = 1;
90
+
91
+ if( h->param.i_avcintra_class )
92
+ {
93
+ if( BIT_DEPTH != 10 )
94
+ {
95
+ x264_log( h, X264_LOG_ERROR, "%2d-bit AVC-Intra is not widely compatible\n", BIT_DEPTH );
96
+ x264_log( h, X264_LOG_ERROR, "10-bit x264 is required to encode AVC-Intra\n" );
97
+ return -1;
98
+ }
99
+
100
+ int type = h->param.i_avcintra_class == 200 ? 2 :
101
+ h->param.i_avcintra_class == 100 ? 1 :
102
+ h->param.i_avcintra_class == 50 ? 0 : -1;
103
+ if( type < 0 )
104
+ {
105
+ x264_log( h, X264_LOG_ERROR, "Invalid AVC-Intra class\n" );
106
+ return -1;
107
+ }
108
+
109
+ /* [50/100/200][res][fps] */
110
+ static const struct
111
+ {
112
+ uint16_t fps_num;
113
+ uint16_t fps_den;
114
+ uint8_t interlaced;
115
+ uint16_t frame_size;
116
+ const uint8_t *cqm_4ic;
117
+ const uint8_t *cqm_8iy;
118
+ } avcintra_lut[3][2][7] =
119
+ {
120
+ {{{ 60000, 1001, 0, 912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
121
+ { 50, 1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
122
+ { 30000, 1001, 0, 912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
123
+ { 25, 1, 0, 1100, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
124
+ { 24000, 1001, 0, 912, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }},
125
+ {{ 30000, 1001, 1, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
126
+ { 25, 1, 1, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_1080i_8iy },
127
+ { 60000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
128
+ { 30000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
129
+ { 50, 1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
130
+ { 25, 1, 0, 2196, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy },
131
+ { 24000, 1001, 0, 1820, x264_cqm_avci50_4ic, x264_cqm_avci50_p_8iy }}},
132
+ {{{ 60000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
133
+ { 50, 1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
134
+ { 30000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
135
+ { 25, 1, 0, 2224, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
136
+ { 24000, 1001, 0, 1848, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
137
+ {{ 30000, 1001, 1, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
138
+ { 25, 1, 1, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
139
+ { 60000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
140
+ { 30000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
141
+ { 50, 1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
142
+ { 25, 1, 0, 4444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
143
+ { 24000, 1001, 0, 3692, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}},
144
+ {{{ 60000, 1001, 0, 3724, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy },
145
+ { 50, 1, 0, 4472, x264_cqm_avci100_720p_4ic, x264_cqm_avci100_720p_8iy }},
146
+ {{ 30000, 1001, 1, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
147
+ { 25, 1, 1, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080i_8iy },
148
+ { 60000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
149
+ { 30000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
150
+ { 50, 1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
151
+ { 25, 1, 0, 8940, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy },
152
+ { 24000, 1001, 0, 7444, x264_cqm_avci100_1080_4ic, x264_cqm_avci100_1080p_8iy }}}
153
+ };
154
+
155
+ int res = -1;
156
+ if( i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 && !type )
157
+ {
158
+ if( h->param.i_width == 1440 && h->param.i_height == 1080 ) res = 1;
159
+ else if( h->param.i_width == 960 && h->param.i_height == 720 ) res = 0;
160
+ }
161
+ else if( i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 && type )
162
+ {
163
+ if( h->param.i_width == 1920 && h->param.i_height == 1080 ) res = 1;
164
+ else if( h->param.i_width == 1280 && h->param.i_height == 720 ) res = 0;
165
+ }
166
+ else
167
+ {
168
+ x264_log( h, X264_LOG_ERROR, "Invalid colorspace for AVC-Intra %d\n", h->param.i_avcintra_class );
169
+ return -1;
170
+ }
171
+
172
+ if( res < 0 )
173
+ {
174
+ x264_log( h, X264_LOG_ERROR, "Resolution %dx%d invalid for AVC-Intra %d\n",
175
+ h->param.i_width, h->param.i_height, h->param.i_avcintra_class );
176
+ return -1;
177
+ }
178
+
179
+ if( h->param.nalu_process )
180
+ {
181
+ x264_log( h, X264_LOG_ERROR, "nalu_process is not supported in AVC-Intra mode\n" );
182
+ return -1;
183
+ }
184
+
185
+ if( !h->param.b_repeat_headers )
186
+ {
187
+ x264_log( h, X264_LOG_ERROR, "Separate headers not supported in AVC-Intra mode\n" );
188
+ return -1;
189
+ }
190
+
191
+ int i;
192
+ uint32_t fps_num = h->param.i_fps_num, fps_den = h->param.i_fps_den;
193
+ x264_reduce_fraction( &fps_num, &fps_den );
194
+ for( i = 0; i < 7; i++ )
195
+ {
196
+ if( avcintra_lut[type][res][i].fps_num == fps_num &&
197
+ avcintra_lut[type][res][i].fps_den == fps_den &&
198
+ avcintra_lut[type][res][i].interlaced == PARAM_INTERLACED )
199
+ {
200
+ break;
201
+ }
202
+ }
203
+ if( i == 7 )
204
+ {
205
+ x264_log( h, X264_LOG_ERROR, "FPS %d/%d%c not compatible with AVC-Intra\n",
206
+ h->param.i_fps_num, h->param.i_fps_den, PARAM_INTERLACED ? 'i' : 'p' );
207
+ return -1;
208
+ }
209
+
210
+ h->param.i_keyint_max = 1;
211
+ h->param.b_intra_refresh = 0;
212
+ h->param.analyse.i_weighted_pred = 0;
213
+ h->param.i_frame_reference = 1;
214
+ h->param.i_dpb_size = 1;
215
+
216
+ h->param.b_bluray_compat = 0;
217
+ h->param.b_vfr_input = 0;
218
+ h->param.b_aud = 1;
219
+ h->param.vui.i_chroma_loc = 0;
220
+ h->param.i_nal_hrd = X264_NAL_HRD_NONE;
221
+ h->param.b_deblocking_filter = 0;
222
+ h->param.b_stitchable = 1;
223
+ h->param.b_pic_struct = 0;
224
+ h->param.analyse.b_transform_8x8 = 1;
225
+ h->param.analyse.intra = X264_ANALYSE_I8x8;
226
+ h->param.analyse.i_chroma_qp_offset = res && type ? 3 : 4;
227
+ h->param.b_cabac = !type;
228
+ h->param.rc.i_vbv_buffer_size = avcintra_lut[type][res][i].frame_size;
229
+ h->param.rc.i_vbv_max_bitrate =
230
+ h->param.rc.i_bitrate = h->param.rc.i_vbv_buffer_size * fps_num / fps_den;
231
+ h->param.rc.i_rc_method = X264_RC_ABR;
232
+ h->param.rc.f_vbv_buffer_init = 1.0;
233
+ h->param.rc.b_filler = 1;
234
+ h->param.i_cqm_preset = X264_CQM_CUSTOM;
235
+ memcpy( h->param.cqm_4iy, x264_cqm_jvt4i, sizeof(h->param.cqm_4iy) );
236
+ memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) );
237
+ memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) );
238
+
239
+ /* Need exactly 10 slices of equal MB count... why? $deity knows... */
240
+ h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10;
241
+ h->param.i_slice_max_size = 0;
242
+ /* The slice structure only allows a maximum of 2 threads for 1080i/p
243
+ * and 1 or 5 threads for 720p */
244
+ if( h->param.b_sliced_threads )
245
+ {
246
+ if( res )
247
+ h->param.i_threads = X264_MIN( 2, h->param.i_threads );
248
+ else
249
+ {
250
+ h->param.i_threads = X264_MIN( 5, h->param.i_threads );
251
+ if( h->param.i_threads < 5 )
252
+ h->param.i_threads = 1;
253
+ }
254
+ }
255
+
256
+ if( type )
257
+ h->param.vui.i_sar_width = h->param.vui.i_sar_height = 1;
258
+ else
259
+ {
260
+ h->param.vui.i_sar_width = 4;
261
+ h->param.vui.i_sar_height = 3;
262
+ }
263
+
264
+ /* Official encoder doesn't appear to go under 13
265
+ * and Avid cannot handle negative QPs */
266
+ h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 );
267
+ }
268
+
269
h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 );
270
h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 );
271
h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
272
273
h->param.analyse.i_chroma_qp_offset += 6;
274
/* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
275
/* so we lower the chroma QP offset to compensate */
276
- if( b_open && h->mb.i_psy_rd )
277
+ if( b_open && h->mb.i_psy_rd && !h->param.i_avcintra_class )
278
h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
279
/* Psy trellis has a similar effect. */
280
- if( b_open && h->mb.i_psy_trellis )
281
+ if( b_open && h->mb.i_psy_trellis && !h->param.i_avcintra_class )
282
h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
283
h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
284
/* MB-tree requires AQ to be on, even if the strength is zero. */
285
286
287
h->param.i_sps_id &= 31;
288
289
- if( PARAM_INTERLACED )
290
- h->param.b_pic_struct = 1;
291
-
292
h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR );
293
294
if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size )
295
296
h->param.i_nal_hrd = X264_NAL_HRD_VBR;
297
}
298
299
+ if( h->param.i_nal_hrd == X264_NAL_HRD_CBR )
300
+ h->param.rc.b_filler = 1;
301
+
302
/* ensure the booleans are 0 or 1 so they can be used in math */
303
#define BOOLIFY(x) h->param.x = !!h->param.x
304
BOOLIFY( b_cabac );
305
306
BOOLIFY( b_sliced_threads );
307
BOOLIFY( b_interlaced );
308
BOOLIFY( b_intra_refresh );
309
- BOOLIFY( b_visualize );
310
BOOLIFY( b_aud );
311
BOOLIFY( b_repeat_headers );
312
BOOLIFY( b_annexb );
313
314
BOOLIFY( rc.b_stat_write );
315
BOOLIFY( rc.b_stat_read );
316
BOOLIFY( rc.b_mb_tree );
317
+ BOOLIFY( rc.b_filler );
318
#undef BOOLIFY
319
320
return 0;
321
322
h->param.vui.i_sar_width = i_w;
323
h->param.vui.i_sar_height = i_h;
324
}
325
- x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
326
}
327
}
328
}
329
330
goto fail;
331
}
332
333
+ x264_set_aspect_ratio( h, &h->param, 1 );
334
+
335
x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
336
x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps );
337
338
- x264_set_aspect_ratio( h, &h->param, 1 );
339
-
340
x264_validate_levels( h, 1 );
341
342
h->chroma_qp_table = i_chroma_qp_table + 12 + h->pps->i_chroma_qp_index_offset;
343
344
h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */
345
CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
346
347
+ CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) );
348
+
349
if( h->param.i_threads > 1 &&
350
x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
351
goto fail;
352
if( h->param.i_lookahead_threads > 1 &&
353
- x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
354
+ x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) )
355
goto fail;
356
357
#if HAVE_OPENCL
358
359
CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) );
360
*h->lookahead_thread[i] = *h;
361
}
362
+ *h->reconfig_h = *h;
363
364
for( int i = 0; i < h->param.i_threads; i++ )
365
{
366
367
if( h->param.psz_dump_yuv )
368
{
369
/* create or truncate the reconstructed video file */
370
- FILE *f = fopen( h->param.psz_dump_yuv, "w" );
371
+ FILE *f = x264_fopen( h->param.psz_dump_yuv, "w" );
372
if( !f )
373
{
374
x264_log( h, X264_LOG_ERROR, "dump_yuv: can't write to %s\n", h->param.psz_dump_yuv );
375
376
return NULL;
377
}
378
379
-/****************************************************************************
380
- * x264_encoder_reconfig:
381
- ****************************************************************************/
382
-int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
383
+/****************************************************************************/
384
+static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig )
385
{
386
- /* If the previous frame isn't done encoding, reconfiguring is probably dangerous. */
387
- if( h->param.b_sliced_threads )
388
- if( x264_threadpool_wait_all( h ) < 0 )
389
- return -1;
390
-
391
- int rc_reconfig = 0;
392
- h = h->thread[h->thread[0]->i_thread_phase];
393
+ *rc_reconfig = 0;
394
x264_set_aspect_ratio( h, param, 0 );
395
#define COPY(var) h->param.var = param->var
396
COPY( i_frame_reference ); // but never uses more refs than initially specified
397
398
if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 &&
399
param->rc.i_vbv_max_bitrate > 0 && param->rc.i_vbv_buffer_size > 0 )
400
{
401
- rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
402
- rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
403
- rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
404
+ *rc_reconfig |= h->param.rc.i_vbv_max_bitrate != param->rc.i_vbv_max_bitrate;
405
+ *rc_reconfig |= h->param.rc.i_vbv_buffer_size != param->rc.i_vbv_buffer_size;
406
+ *rc_reconfig |= h->param.rc.i_bitrate != param->rc.i_bitrate;
407
COPY( rc.i_vbv_max_bitrate );
408
COPY( rc.i_vbv_buffer_size );
409
COPY( rc.i_bitrate );
410
}
411
- rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
412
- rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
413
+ *rc_reconfig |= h->param.rc.f_rf_constant != param->rc.f_rf_constant;
414
+ *rc_reconfig |= h->param.rc.f_rf_constant_max != param->rc.f_rf_constant_max;
415
COPY( rc.f_rf_constant );
416
COPY( rc.f_rf_constant_max );
417
#undef COPY
418
419
- mbcmp_init( h );
420
+ return x264_validate_parameters( h, 0 );
421
+}
422
423
- int ret = x264_validate_parameters( h, 0 );
424
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param )
425
+{
426
+ int rc_reconfig;
427
+ int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig );
428
+
429
+ mbcmp_init( h );
430
+ if( !ret )
431
+ x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
432
433
/* Supported reconfiguration options (1-pass only):
434
* vbv-maxrate
435
436
}
437
438
/****************************************************************************
439
+ * x264_encoder_reconfig:
440
+ ****************************************************************************/
441
+int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
442
+{
443
+ h = h->thread[h->thread[0]->i_thread_phase];
444
+ x264_param_t param_save = h->reconfig_h->param;
445
+ h->reconfig_h->param = h->param;
446
+
447
+ int rc_reconfig;
448
+ int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig );
449
+ if( !ret )
450
+ h->reconfig = 1;
451
+ else
452
+ h->reconfig_h->param = param_save;
453
+
454
+ return ret;
455
+}
456
+
457
+/****************************************************************************
458
* x264_encoder_parameters:
459
****************************************************************************/
460
void x264_encoder_parameters( x264_t *h, x264_param_t *param )
461
462
463
nal->i_payload= 0;
464
nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
465
+ nal->i_padding= 0;
466
}
467
468
/* if number of allocated nals is not enough, re-allocate a larger one. */
469
470
return x264_nal_check_buffer( h );
471
}
472
473
+static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start,
474
+ int previous_nal_size, int necessary_size )
475
+{
476
+ if( h0->nal_buffer_size < necessary_size )
477
+ {
478
+ necessary_size *= 2;
479
+ uint8_t *buf = x264_malloc( necessary_size );
480
+ if( !buf )
481
+ return -1;
482
+ if( previous_nal_size )
483
+ memcpy( buf, h0->nal_buffer, previous_nal_size );
484
+
485
+ intptr_t delta = buf - h0->nal_buffer;
486
+ for( int i = 0; i < start; i++ )
487
+ h->out.nal[i].p_payload += delta;
488
+
489
+ x264_free( h0->nal_buffer );
490
+ h0->nal_buffer = buf;
491
+ h0->nal_buffer_size = necessary_size;
492
+ }
493
+
494
+ return 0;
495
+}
496
+
497
static int x264_encoder_encapsulate_nals( x264_t *h, int start )
498
{
499
x264_t *h0 = h->thread[0];
500
501
502
/* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
503
int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64;
504
- if( h0->nal_buffer_size < necessary_size )
505
- {
506
- necessary_size *= 2;
507
- uint8_t *buf = x264_malloc( necessary_size );
508
- if( !buf )
509
- return -1;
510
- if( previous_nal_size )
511
- memcpy( buf, h0->nal_buffer, previous_nal_size );
512
-
513
- intptr_t delta = buf - h0->nal_buffer;
514
- for( int i = 0; i < start; i++ )
515
- h->out.nal[i].p_payload += delta;
516
-
517
- x264_free( h0->nal_buffer );
518
- h0->nal_buffer = buf;
519
- h0->nal_buffer_size = necessary_size;
520
- }
521
+ for( int i = start; i < h->out.i_nal; i++ )
522
+ necessary_size += h->out.nal[i].i_padding;
523
+ if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
524
+ return -1;
525
526
uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size;
527
528
for( int i = start; i < h->out.i_nal; i++ )
529
{
530
- h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
531
+ int old_payload_len = h->out.nal[i].i_payload;
532
+ h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS ||
533
+ h->param.i_avcintra_class;
534
x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
535
nal_buffer += h->out.nal[i].i_payload;
536
+ if( h->param.i_avcintra_class )
537
+ {
538
+ h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD);
539
+ if( h->out.nal[i].i_padding > 0 )
540
+ {
541
+ memset( nal_buffer, 0, h->out.nal[i].i_padding );
542
+ nal_buffer += h->out.nal[i].i_padding;
543
+ h->out.nal[i].i_payload += h->out.nal[i].i_padding;
544
+ }
545
+ h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 );
546
+ }
547
}
548
549
x264_emms();
550
551
}
552
}
553
554
-static int x264_slice_write( x264_t *h )
555
+static intptr_t x264_slice_write( x264_t *h )
556
{
557
int i_skip;
558
int mb_xy, i_mb_x, i_mb_y;
559
560
* other inaccuracies. */
561
int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
562
int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
563
- int back_up_bitstream = slice_max_size || (!h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH);
564
+ int back_up_bitstream_cavlc = !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH;
565
+ int back_up_bitstream = slice_max_size || back_up_bitstream_cavlc;
566
int starting_bits = bs_pos(&h->out.bs);
567
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
568
int b_hpel = h->fdec->b_kept_as_ref;
569
570
int thread_last_mb = h->i_threadslice_end * h->mb.i_mb_width - 1;
571
uint8_t *last_emu_check;
572
#define BS_BAK_SLICE_MAX_SIZE 0
573
-#define BS_BAK_SLICE_MIN_MBS 1
574
-#define BS_BAK_ROW_VBV 2
575
- x264_bs_bak_t bs_bak[3];
576
+#define BS_BAK_CAVLC_OVERFLOW 1
577
+#define BS_BAK_SLICE_MIN_MBS 2
578
+#define BS_BAK_ROW_VBV 3
579
+ x264_bs_bak_t bs_bak[4];
580
b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
581
bs_realign( &h->out.bs );
582
583
584
x264_fdec_filter_row( h, i_mb_y, 0 );
585
}
586
587
- if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
588
+ if( back_up_bitstream )
589
{
590
- x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
591
- if( slice_max_size && (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
592
- x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
593
+ if( back_up_bitstream_cavlc )
594
+ x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 );
595
+ if( slice_max_size && !(i_mb_y & SLICE_MBAFF) )
596
+ {
597
+ x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 );
598
+ if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs )
599
+ x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 );
600
+ }
601
}
602
603
if( PARAM_INTERLACED )
604
605
h->mb.i_skip_intra = 0;
606
h->mb.b_skip_mc = 0;
607
h->mb.b_overflow = 0;
608
- x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 );
609
+ x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 );
610
goto reencode;
611
}
612
}
613
614
cont:
615
h->mb.b_reencode_mb = 0;
616
617
-#if HAVE_VISUALIZE
618
- if( h->param.b_visualize )
619
- x264_visualize_mb( h );
620
-#endif
621
-
622
/* save cache */
623
x264_macroblock_cache_save( h );
624
625
626
x264_frame_push_unused( src, dst->fdec );
627
628
// copy everything except the per-thread pointers and the constants.
629
- memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.type) - offsetof(x264_t, i_frame) );
630
+ memcpy( &dst->i_frame, &src->i_frame, offsetof(x264_t, mb.base) - offsetof(x264_t, i_frame) );
631
dst->param = src->param;
632
dst->stat = src->stat;
633
dst->pixf = src->pixf;
634
+ dst->reconfig = src->reconfig;
635
}
636
637
static void x264_thread_sync_stat( x264_t *dst, x264_t *src )
638
639
int i_slice_num = 0;
640
int last_thread_mb = h->sh.i_last_mb;
641
642
-#if HAVE_VISUALIZE
643
- if( h->param.b_visualize )
644
- if( x264_visualize_init( h ) )
645
- goto fail;
646
-#endif
647
-
648
/* init stats */
649
memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
650
h->mb.b_reencode_mb = 0;
651
652
h->sh.i_first_mb -= h->mb.i_mb_stride;
653
}
654
655
-#if HAVE_VISUALIZE
656
- if( h->param.b_visualize )
657
- {
658
- x264_visualize_show( h );
659
- x264_visualize_close( h );
660
- }
661
-#endif
662
-
663
return (void *)0;
664
665
fail:
666
667
thread_current =
668
thread_oldest = h;
669
}
670
-#if HAVE_MMX
671
- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
672
- x264_cpu_mask_misalign_sse();
673
-#endif
674
h->i_cpb_delay_pir_offset = h->i_cpb_delay_pir_offset_next;
675
676
/* no data out */
677
678
679
if( h->i_frame == h->i_thread_frames - 1 )
680
h->i_reordered_pts_delay = h->fenc->i_reordered_pts;
681
+ if( h->reconfig )
682
+ {
683
+ x264_encoder_reconfig_apply( h, &h->reconfig_h->param );
684
+ h->reconfig = 0;
685
+ }
686
if( h->fenc->param )
687
{
688
- x264_encoder_reconfig( h, h->fenc->param );
689
+ x264_encoder_reconfig_apply( h, h->fenc->param );
690
if( h->fenc->param->param_free )
691
{
692
h->fenc->param->param_free( h->fenc->param );
693
694
bs_rbsp_trailing( &h->out.bs );
695
if( x264_nal_end( h ) )
696
return -1;
697
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
698
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
699
}
700
701
h->i_nal_type = i_nal_type;
702
703
x264_sps_write( &h->out.bs, h->sps );
704
if( x264_nal_end( h ) )
705
return -1;
706
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
707
+ /* Pad AUD/SPS to 256 bytes like Panasonic */
708
+ if( h->param.i_avcintra_class )
709
+ h->out.nal[h->out.i_nal-1].i_padding = 256 - bs_pos( &h->out.bs ) / 8 - 2*NALU_OVERHEAD;
710
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
711
712
/* generate picture parameters */
713
x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
714
x264_pps_write( &h->out.bs, h->sps, h->pps );
715
if( x264_nal_end( h ) )
716
return -1;
717
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
718
+ if( h->param.i_avcintra_class )
719
+ h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD;
720
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD;
721
}
722
723
/* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */
724
725
x264_sei_buffering_period_write( h, &h->out.bs );
726
if( x264_nal_end( h ) )
727
return -1;
728
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
729
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
730
}
731
}
732
733
734
h->fenc->extra_sei.payloads[i].payload_type );
735
if( x264_nal_end( h ) )
736
return -1;
737
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
738
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
739
if( h->fenc->extra_sei.sei_free )
740
{
741
h->fenc->extra_sei.sei_free( h->fenc->extra_sei.payloads[i].payload );
742
743
744
if( h->fenc->b_keyframe )
745
{
746
- if( h->param.b_repeat_headers && h->fenc->i_frame == 0 )
747
+ /* Avid's decoder strictly wants two SEIs for AVC-Intra so we can't insert the x264 SEI */
748
+ if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class )
749
{
750
/* identify ourself */
751
x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
752
753
return -1;
754
if( x264_nal_end( h ) )
755
return -1;
756
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
757
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
758
}
759
760
if( h->fenc->i_type != X264_TYPE_IDR )
761
762
x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
763
if( x264_nal_end( h ) )
764
return -1;
765
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
766
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
767
}
768
769
- if ( h->param.i_frame_packing >= 0 )
770
+ if( h->param.i_frame_packing >= 0 )
771
{
772
x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
773
x264_sei_frame_packing_write( h, &h->out.bs );
774
if( x264_nal_end( h ) )
775
return -1;
776
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
777
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
778
}
779
}
780
781
782
x264_sei_pic_timing_write( h, &h->out.bs );
783
if( x264_nal_end( h ) )
784
return -1;
785
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
786
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
787
}
788
789
/* As required by Blu-ray. */
790
791
x264_sei_dec_ref_pic_marking_write( h, &h->out.bs );
792
if( x264_nal_end( h ) )
793
return -1;
794
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal-1);
795
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
796
}
797
798
if( h->fenc->b_keyframe && h->param.b_intra_refresh )
799
h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay;
800
801
+ /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */
802
+ if( h->param.i_avcintra_class )
803
+ {
804
+ /* Write an empty filler NAL to mimic the AUD in the P2 format*/
805
+ x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
806
+ x264_filler_write( h, &h->out.bs, 0 );
807
+ if( x264_nal_end( h ) )
808
+ return -1;
809
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
810
+
811
+ /* All lengths are magic lengths that decoders expect to see */
812
+ /* "UMID" SEI */
813
+ x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
814
+ if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 )
815
+ return -1;
816
+ if( x264_nal_end( h ) )
817
+ return -1;
818
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD;
819
+
820
+ int unpadded_len;
821
+ int total_len;
822
+ if( h->param.i_height == 1080 )
823
+ {
824
+ unpadded_len = 5780;
825
+ total_len = 17*512;
826
+ }
827
+ else
828
+ {
829
+ unpadded_len = 2900;
830
+ total_len = 9*512;
831
+ }
832
+ /* "VANC" SEI */
833
+ x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
834
+ if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 )
835
+ return -1;
836
+ if( x264_nal_end( h ) )
837
+ return -1;
838
+
839
+ h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD;
840
+ overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + SEI_OVERHEAD;
841
+ }
842
+
843
/* Init the rate control */
844
/* FIXME: Include slice header bit cost. */
845
x264_ratecontrol_start( h, h->fenc->i_qpplus1, overhead*8 );
846
847
pic_out->hrd_timing = h->fenc->hrd_timing;
848
pic_out->prop.f_crf_avg = h->fdec->f_crf_avg;
849
850
- while( filler > 0 )
851
+ /* Filler in AVC-Intra mode is written as zero bytes to the last slice
852
+ * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */
853
+ if( h->param.i_avcintra_class )
854
+ {
855
+ x264_t *h0 = h->thread[0];
856
+ int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler );
857
+ if( ret < 0 )
858
+ return -1;
859
+ memset( h->out.nal[0].p_payload + frame_size, 0, filler );
860
+ h->out.nal[h->out.i_nal-1].i_payload += filler;
861
+ h->out.nal[h->out.i_nal-1].i_padding = filler;
862
+ frame_size += filler;
863
+ }
864
+ else
865
{
866
- int f, overhead;
867
- overhead = (FILLER_OVERHEAD - h->param.b_annexb);
868
- if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
869
- {
870
- int next_size = filler - h->param.i_slice_max_size;
871
- int overflow = X264_MAX( overhead - next_size, 0 );
872
- f = h->param.i_slice_max_size - overhead - overflow;
873
- }
874
- else
875
- f = X264_MAX( 0, filler - overhead );
876
+ while( filler > 0 )
877
+ {
878
+ int f, overhead;
879
+ overhead = (FILLER_OVERHEAD - h->param.b_annexb);
880
+ if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
881
+ {
882
+ int next_size = filler - h->param.i_slice_max_size;
883
+ int overflow = X264_MAX( overhead - next_size, 0 );
884
+ f = h->param.i_slice_max_size - overhead - overflow;
885
+ }
886
+ else
887
+ f = X264_MAX( 0, filler - overhead );
888
889
- if( x264_bitstream_check_buffer_filler( h, f ) )
890
- return -1;
891
- x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
892
- x264_filler_write( h, &h->out.bs, f );
893
- if( x264_nal_end( h ) )
894
- return -1;
895
- int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
896
- if( total_size < 0 )
897
- return -1;
898
- frame_size += total_size;
899
- filler -= total_size;
900
+ if( x264_bitstream_check_buffer_filler( h, f ) )
901
+ return -1;
902
+ x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
903
+ x264_filler_write( h, &h->out.bs, f );
904
+ if( x264_nal_end( h ) )
905
+ return -1;
906
+ int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
907
+ if( total_size < 0 )
908
+ return -1;
909
+ frame_size += total_size;
910
+ filler -= total_size;
911
+ }
912
}
913
914
/* End bitstream, set output */
915
916
917
x264_cqm_delete( h );
918
x264_free( h->nal_buffer );
919
+ x264_free( h->reconfig_h );
920
x264_analyse_free_costs( h );
921
922
if( h->i_thread_frames > 1 )
923
x264-snapshot-20130723-2245.tar.bz2/encoder/lookahead.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/lookahead.c
Changed
28
1
2
/*****************************************************************************
3
* lookahead.c: high-level lookahead functions
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 Avail Media and x264 project
6
+ * Copyright (C) 2010-2014 Avail Media and x264 project
7
*
8
* Authors: Michael Kazmier <mkazmier@availmedia.com>
9
* Alex Giladi <agiladi@availmedia.com>
10
11
12
static void *x264_lookahead_thread( x264_t *h )
13
{
14
- int shift;
15
-#if HAVE_MMX
16
- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
17
- x264_cpu_mask_misalign_sse();
18
-#endif
19
while( !h->lookahead->b_exit_thread )
20
{
21
x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
22
x264_pthread_mutex_lock( &h->lookahead->next.mutex );
23
- shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
24
+ int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
25
x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
26
x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
27
if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
28
x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.c
Changed
28
1
2
/*****************************************************************************
3
* macroblock.c: macroblock encoding
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
* Jason Garrett-Glaser <darkshikari@gmail.com>
11
- * Henrik Gramner <hengar-6@student.ltu.se>
12
+ * Henrik Gramner <henrik@gramner.com>
13
*
14
* This program is free software; you can redistribute it and/or modify
15
* it under the terms of the GNU General Public License as published by
16
17
return;
18
}
19
20
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 0+p*16]] ) = 0;
21
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 2+p*16]] ) = 0;
22
- M32( &h->mb.cache.non_zero_count[x264_scan8[ 8+p*16]] ) = 0;
23
- M32( &h->mb.cache.non_zero_count[x264_scan8[10+p*16]] ) = 0;
24
+ CLEAR_16x16_NNZ( p );
25
26
h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
27
28
x264-snapshot-20130723-2245.tar.bz2/encoder/macroblock.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/macroblock.h
Changed
25
1
2
/*****************************************************************************
3
* macroblock.h: macroblock encoding
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
#define CLEAR_16x16_NNZ( p ) \
12
do\
13
{\
14
- M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 0]] ) = 0;\
15
- M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 2]] ) = 0;\
16
- M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+ 8]] ) = 0;\
17
- M32( &h->mb.cache.non_zero_count[x264_scan8[16*p+10]] ) = 0;\
18
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
19
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
20
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
21
+ M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
22
} while(0)
23
24
/* A special for loop that iterates branchlessly over each set
25
x264-snapshot-20130723-2245.tar.bz2/encoder/me.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.c
Changed
38
1
2
/*****************************************************************************
3
* me.c: motion estimation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
12
/* hexagon */
13
COST_MV_X3_DIR( -2,0, -1, 2, 1, 2, costs );
14
- COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+3 );
15
+ COST_MV_X3_DIR( 2,0, 1,-2, -1,-2, costs+4 ); /* +4 for 16-byte alignment */
16
bcost <<= 3;
17
COPY1_IF_LT( bcost, (costs[0]<<3)+2 );
18
COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
19
COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
20
- COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
21
- COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
22
- COPY1_IF_LT( bcost, (costs[5]<<3)+7 );
23
+ COPY1_IF_LT( bcost, (costs[4]<<3)+5 );
24
+ COPY1_IF_LT( bcost, (costs[5]<<3)+6 );
25
+ COPY1_IF_LT( bcost, (costs[6]<<3)+7 );
26
27
if( bcost&7 )
28
{
29
30
for( i = 0; i < xn-2; i += 3 )
31
{
32
pixel *ref = p_fref_w+min_x+my*stride;
33
- int sads[3];
34
+ ALIGNED_ARRAY_16( int, sads,[4] ); /* padded to [4] for asm */
35
h->pixf.sad_x3[i_pixel]( p_fenc, ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
36
for( int j = 0; j < 3; j++ )
37
{
38
x264-snapshot-20130723-2245.tar.bz2/encoder/me.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/me.h
Changed
10
1
2
/*****************************************************************************
3
* me.h: motion estimation
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.c
Changed
171
1
2
/*****************************************************************************
3
* ratecontrol.c: ratecontrol
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Michael Niedermayer <michaelni@gmx.at>
10
11
double vbv_max_rate; /* # of bits added to buffer_fill per second */
12
predictor_t *pred; /* predict frame size from satd */
13
int single_frame_vbv;
14
- double rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
15
+ float rate_factor_max_increment; /* Don't allow RF above (CRF + this value). */
16
17
/* ABR stuff */
18
int last_satd;
19
20
h->param.rc.i_vbv_buffer_size );
21
}
22
23
- int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * 1000;
24
- int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
25
+ int kilobit_size = h->param.i_avcintra_class ? 1024 : 1000;
26
+ int vbv_buffer_size = h->param.rc.i_vbv_buffer_size * kilobit_size;
27
+ int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * kilobit_size;
28
29
/* Init HRD */
30
if( h->param.i_nal_hrd && b_init )
31
32
#define BR_SHIFT 6
33
#define CPB_SHIFT 4
34
35
- int bitrate = 1000*h->param.rc.i_vbv_max_bitrate;
36
- int bufsize = 1000*h->param.rc.i_vbv_buffer_size;
37
-
38
// normalize HRD size and rate to the value / scale notation
39
- h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( bitrate ) - BR_SHIFT, 0, 15 );
40
- h->sps->vui.hrd.i_bit_rate_value = bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
41
+ h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 );
42
+ h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
43
h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT );
44
- h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( bufsize ) - CPB_SHIFT, 0, 15 );
45
- h->sps->vui.hrd.i_cpb_size_value = bufsize >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
46
+ h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 );
47
+ h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
48
h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT );
49
50
#undef CPB_SHIFT
51
52
h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
53
54
if( rc->b_vbv_min_rate )
55
- rc->bitrate = h->param.rc.i_bitrate * 1000.;
56
+ rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size;
57
rc->buffer_rate = vbv_max_bitrate / rc->fps;
58
rc->vbv_max_rate = vbv_max_bitrate;
59
rc->buffer_size = vbv_buffer_size;
60
61
else
62
rc->qcompress = h->param.rc.f_qcompress;
63
64
- rc->bitrate = h->param.rc.i_bitrate * 1000.;
65
+ rc->bitrate = h->param.rc.i_bitrate * (h->param.i_avcintra_class ? 1024. : 1000.);
66
rc->rate_tolerance = h->param.rc.f_rate_tolerance;
67
rc->nmb = h->mb.i_mb_count;
68
rc->last_non_b_pict_type = -1;
69
70
char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
71
if( !mbtree_stats_in )
72
return -1;
73
- rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
74
+ rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" );
75
x264_free( mbtree_stats_in );
76
if( !rc->p_mbtree_stat_file_in )
77
{
78
79
* so we'll at least try to roughly approximate this effect. */
80
res_factor_bits = powf( res_factor, 0.7 );
81
82
- if( ( p = strstr( opts, "timebase=" ) ) && sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
83
+ if( !( p = strstr( opts, "timebase=" ) ) || sscanf( p, "timebase=%u/%u", &k, &l ) != 2 )
84
{
85
x264_log( h, X264_LOG_ERROR, "timebase specified in stats file not valid\n" );
86
return -1;
87
88
if( !rc->psz_stat_file_tmpname )
89
return -1;
90
91
- rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
92
+ rc->p_stat_file_out = x264_fopen( rc->psz_stat_file_tmpname, "wb" );
93
if( rc->p_stat_file_out == NULL )
94
{
95
x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n" );
96
97
if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name )
98
return -1;
99
100
- rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
101
+ rc->p_mbtree_stat_file_out = x264_fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
102
if( rc->p_mbtree_stat_file_out == NULL )
103
{
104
x264_log( h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n" );
105
106
b_regular_file = x264_is_regular_file( rc->p_stat_file_out );
107
fclose( rc->p_stat_file_out );
108
if( h->i_frame >= rc->num_entries && b_regular_file )
109
- if( rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
110
+ if( x264_rename( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ) != 0 )
111
{
112
x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
113
rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
114
115
b_regular_file = x264_is_regular_file( rc->p_mbtree_stat_file_out );
116
fclose( rc->p_mbtree_stat_file_out );
117
if( h->i_frame >= rc->num_entries && b_regular_file )
118
- if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
119
+ if( x264_rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
120
{
121
x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
122
rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
123
124
x264_emms();
125
126
if( zone && (!rc->prev_zone || zone->param != rc->prev_zone->param) )
127
- x264_encoder_reconfig( h, zone->param );
128
+ x264_encoder_reconfig_apply( h, zone->param );
129
rc->prev_zone = zone;
130
131
if( h->param.rc.b_stat_read )
132
133
rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
134
135
if( rct->buffer_fill_final < 0 )
136
- x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
137
+ {
138
+ double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale;
139
+ if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment )
140
+ x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow );
141
+ else
142
+ x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow );
143
+ }
144
rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
145
- rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
146
147
- if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
148
+ if( h->param.i_avcintra_class )
149
+ rct->buffer_fill_final += buffer_size;
150
+ else
151
+ rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
152
+
153
+ if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size )
154
{
155
int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8;
156
filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
157
- bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
158
+ bits = h->param.i_avcintra_class ? filler * 8 : X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
159
rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
160
}
161
else
162
163
* we're adding or removing bits), and starting on the earliest frame that
164
* can influence the buffer fill of that end frame. */
165
x264_ratecontrol_t *rcc = h->rc;
166
- const double buffer_min = (over ? .1 : .1) * rcc->buffer_size;
167
+ const double buffer_min = .1 * rcc->buffer_size;
168
const double buffer_max = .9 * rcc->buffer_size;
169
double fill = fills[*t0-1];
170
double parity = over ? 1. : -1.;
171
x264-snapshot-20130723-2245.tar.bz2/encoder/ratecontrol.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/ratecontrol.h
Changed
18
1
2
/*****************************************************************************
3
* ratecontrol.h: ratecontrol
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
void x264_ratecontrol_delete( x264_t * );
12
13
void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
14
+int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param );
15
16
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets );
17
int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
18
x264-snapshot-20130723-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/rdo.c
Changed
10
1
2
/*****************************************************************************
3
* rdo.c: rate-distortion optimization
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Jason Garrett-Glaser <darkshikari@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/encoder/set.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.c
Changed
114
1
2
/*****************************************************************************
3
* set: header writing
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
12
// Indexed by pic_struct values
13
static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
14
+const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
15
16
static void transpose( uint8_t *buf, int w )
17
{
18
19
bs_write( s, 8, payload_size-i );
20
21
for( i = 0; i < payload_size; i++ )
22
- bs_write(s, 8, payload[i] );
23
+ bs_write( s, 8, payload[i] );
24
25
bs_rbsp_trailing( s );
26
bs_flush( s );
27
28
}
29
30
/* FIXME: not sufficient for interlaced video */
31
- sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5;
32
+ sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
33
+ sps->i_chroma_format_idc == CHROMA_420;
34
if( sps->vui.b_chroma_loc_info_present )
35
{
36
sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
37
38
39
// NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
40
41
- sps->vui.b_bitstream_restriction = 1;
42
+ sps->vui.b_bitstream_restriction = param->i_keyint_max > 1;
43
if( sps->vui.b_bitstream_restriction )
44
{
45
sps->vui.b_motion_vectors_over_pic_boundaries = 1;
46
47
pps->i_sps_id = sps->i_id;
48
pps->b_cabac = param->b_cabac;
49
50
- pps->b_pic_order = param->b_interlaced;
51
+ pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
52
pps->i_num_slice_groups = 1;
53
54
pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
55
56
57
memcpy( payload, uuid, 16 );
58
sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
59
- "Copy%s 2003-2013 - http://www.videolan.org/x264.html - options: %s",
60
+ "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s",
61
X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
62
length = strlen(payload)+1;
63
64
65
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
66
}
67
68
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
69
+{
70
+ uint8_t data[512];
71
+ const char *msg = "UMID";
72
+ const int len = 497;
73
+
74
+ memset( data, 0xff, len );
75
+ memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
76
+ memcpy( data+16, msg, strlen(msg) );
77
+
78
+ data[20] = 0x13;
79
+ /* These bytes appear to be some sort of frame/seconds counter in certain applications,
80
+ * but others jump around, so leave them as zero for now */
81
+ data[21] = data[22] = 0;
82
+
83
+ data[28] = 0x14;
84
+ data[36] = 0x60;
85
+ data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
86
+
87
+ x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
88
+
89
+ return 0;
90
+}
91
+
92
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
93
+{
94
+ uint8_t data[6000];
95
+ const char *msg = "VANC";
96
+ if( len > sizeof(data) )
97
+ {
98
+ x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
99
+ return -1;
100
+ }
101
+
102
+ memset( data, 0xff, len );
103
+ memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
104
+ memcpy( data+16, msg, strlen(msg) );
105
+
106
+ x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
107
+
108
+ return 0;
109
+}
110
+
111
const x264_level_t x264_levels[] =
112
{
113
{ 10, 1485, 99, 396, 64, 175, 64, 64, 0, 2, 0, 0, 1 },
114
x264-snapshot-20130723-2245.tar.bz2/encoder/set.h -> x264-snapshot-20140321-2245.tar.bz2/encoder/set.h
Changed
19
1
2
/*****************************************************************************
3
* set.h: header writing
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
12
void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
13
void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
14
+int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s );
15
+int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len );
16
void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
17
void x264_filler_write( x264_t *h, bs_t *s, int filler );
18
19
x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype-cl.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype-cl.c
Changed
10
1
2
/*****************************************************************************
3
* slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead)
4
*****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
*
8
* Authors: Steve Borho <sborho@multicorewareinc.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20140321-2245.tar.bz2/encoder/slicetype.c
Changed
119
1
2
/*****************************************************************************
3
* slicetype.c: lookahead analysis
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Jason Garrett-Glaser <darkshikari@gmail.com>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
return i_score;
12
}
13
14
+/* Trade off precision in mbtree for increased range */
15
+#define MBTREE_PRECISION 0.5f
16
+
17
static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
18
{
19
- int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 );
20
+ int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
21
float weightdelta = 0.0;
22
if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
23
weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
24
25
int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
26
int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
27
int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
28
- int *buf = h->scratch_buffer;
29
+ int16_t *buf = h->scratch_buffer;
30
uint16_t *propagate_cost = frames[b]->i_propagate_cost;
31
+ uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
32
33
x264_emms();
34
- float fps_factor = CLIP_DURATION(frames[b]->f_duration) / CLIP_DURATION(average_duration);
35
+ float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
36
37
/* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
38
if( !referenced )
39
40
{
41
int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
42
h->mc.mbtree_propagate_cost( buf, propagate_cost,
43
- frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
44
+ frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
45
frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
46
if( referenced )
47
propagate_cost += h->mb.i_mb_width;
48
- for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
49
+
50
+ h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
51
+ bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
52
+ if( b != p1 )
53
{
54
- int propagate_amount = buf[h->mb.i_mb_x];
55
- /* Don't propagate for an intra block. */
56
- if( propagate_amount > 0 )
57
- {
58
- /* Access width-2 bitfield. */
59
- int lists_used = frames[b]->lowres_costs[b-p0][p1-b][mb_index] >> LOWRES_COST_SHIFT;
60
- /* Follow the MVs to the previous frame(s). */
61
- for( int list = 0; list < 2; list++ )
62
- if( (lists_used >> list)&1 )
63
- {
64
-#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
65
- int listamount = propagate_amount;
66
- /* Apply bipred weighting. */
67
- if( lists_used == 3 )
68
- listamount = (listamount * bipred_weights[list] + 32) >> 6;
69
-
70
- /* Early termination for simple case of mv0. */
71
- if( !M32( mvs[list][mb_index] ) )
72
- {
73
- CLIP_ADD( ref_costs[list][mb_index], listamount );
74
- continue;
75
- }
76
-
77
- int x = mvs[list][mb_index][0];
78
- int y = mvs[list][mb_index][1];
79
- int mbx = (x>>5)+h->mb.i_mb_x;
80
- int mby = (y>>5)+h->mb.i_mb_y;
81
- int idx0 = mbx + mby * h->mb.i_mb_stride;
82
- int idx1 = idx0 + 1;
83
- int idx2 = idx0 + h->mb.i_mb_stride;
84
- int idx3 = idx0 + h->mb.i_mb_stride + 1;
85
- x &= 31;
86
- y &= 31;
87
- int idx0weight = (32-y)*(32-x);
88
- int idx1weight = (32-y)*x;
89
- int idx2weight = y*(32-x);
90
- int idx3weight = y*x;
91
-
92
- /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
93
- * be counted. */
94
- if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
95
- {
96
- CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
97
- CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
98
- CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
99
- CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
100
- }
101
- else /* Check offsets individually */
102
- {
103
- if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
104
- CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
105
- if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
106
- CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
107
- if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
108
- CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
109
- if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
110
- CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );
111
- }
112
- }
113
- }
114
+ h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
115
+ bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
116
}
117
}
118
119
x264-snapshot-20130723-2245.tar.bz2/filters/filters.c -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.c
Changed
10
1
2
/*****************************************************************************
3
* filters.c: common filter functions
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Diogo Franco <diogomfranco@gmail.com>
9
* Steven Walters <kemuri9@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/filters/filters.h -> x264-snapshot-20140321-2245.tar.bz2/filters/filters.h
Changed
10
1
2
/*****************************************************************************
3
* filters.h: common filter functions
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Diogo Franco <diogomfranco@gmail.com>
9
* Steven Walters <kemuri9@gmail.com>
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/cache.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/cache.c
Changed
10
1
2
/*****************************************************************************
3
* cache.c: cache video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/crop.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/crop.c
Changed
20
1
2
/*****************************************************************************
3
* crop.c: crop video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
* James Darnley <james.darnley@gmail.com>
10
11
for( int i = 0; i < output->img.planes; i++ )
12
{
13
intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
14
- offset += h->dims[0] * h->csp->width[i];
15
- offset *= x264_cli_csp_depth_factor( output->img.csp );
16
+ offset += h->dims[0] * h->csp->width[i] * x264_cli_csp_depth_factor( output->img.csp );
17
output->img.plane[i] += offset;
18
}
19
return 0;
20
x264-snapshot-20130723-2245.tar.bz2/filters/video/depth.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/depth.c
Changed
10
1
2
/*****************************************************************************
3
* depth.c: bit-depth conversion video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Oskar Arvidsson <oskar@irock.se>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/fix_vfr_pts.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/fix_vfr_pts.c
Changed
10
1
2
/*****************************************************************************
3
* fix_vfr_pts.c: vfr pts fixing video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.c
Changed
10
1
2
/*****************************************************************************
3
* internal.c: video filter utilities
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/internal.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/internal.h
Changed
10
1
2
/*****************************************************************************
3
* internal.h: video filter utilities
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/resize.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/resize.c
Changed
118
1
2
/*****************************************************************************
3
* resize.c: resize video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
11
#include <libavutil/opt.h>
12
#include <libavutil/pixdesc.h>
13
14
-#ifndef PIX_FMT_BGRA64
15
-#define PIX_FMT_BGRA64 PIX_FMT_NONE
16
+#ifndef AV_PIX_FMT_BGRA64
17
+#define AV_PIX_FMT_BGRA64 AV_PIX_FMT_NONE
18
#endif
19
20
typedef struct
21
22
23
for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
24
{
25
- printf( "%s", x264_cli_csps[i].name );
26
- if( i+1 < X264_CSP_CLI_MAX )
27
- printf( ", " );
28
+ if( x264_cli_csps[i].name )
29
+ {
30
+ printf( "%s", x264_cli_csps[i].name );
31
+ if( i+1 < X264_CSP_CLI_MAX )
32
+ printf( ", " );
33
+ }
34
}
35
printf( "\n"
36
" - depth: 8 or 16 bits per pixel [keep current]\n"
37
38
switch( csp&X264_CSP_MASK )
39
{
40
case X264_CSP_YV12: /* specially handled via swapping chroma */
41
- case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
42
+ case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P;
43
case X264_CSP_YV16: /* specially handled via swapping chroma */
44
- case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
45
+ case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV422P16 : AV_PIX_FMT_YUV422P;
46
case X264_CSP_YV24: /* specially handled via swapping chroma */
47
- case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
48
- case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48 : PIX_FMT_RGB24;
49
- case X264_CSP_BGR: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGR48 : PIX_FMT_BGR24;
50
- case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_BGRA64 : PIX_FMT_BGRA;
51
+ case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_YUV444P;
52
+ case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_RGB48 : AV_PIX_FMT_RGB24;
53
+ case X264_CSP_BGR: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGR48 : AV_PIX_FMT_BGR24;
54
+ case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
55
/* the next csp has no equivalent 16bit depth in swscale */
56
- case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE : PIX_FMT_NV12;
57
+ case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
58
/* the next csp is no supported by swscale at all */
59
case X264_CSP_NV16:
60
- default: return PIX_FMT_NONE;
61
+ default: return AV_PIX_FMT_NONE;
62
}
63
}
64
65
66
int pix_fmt = convert_csp_to_pix_fmt( csp );
67
// first determine the base csp
68
int ret = X264_CSP_NONE;
69
- const AVPixFmtDescriptor *pix_desc = av_pix_fmt_descriptors+pix_fmt;
70
- if( (unsigned)pix_fmt >= PIX_FMT_NB || !pix_desc->name )
71
+ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get( pix_fmt );
72
+ if( !pix_desc || !pix_desc->name )
73
return ret;
74
75
const char *pix_fmt_name = pix_desc->name;
76
- int is_rgb = pix_desc->flags & (PIX_FMT_RGB | PIX_FMT_PAL);
77
+ int is_rgb = pix_desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PAL);
78
int is_bgr = !!strstr( pix_fmt_name, "bgr" );
79
if( is_bgr || is_rgb )
80
{
81
82
if( strlen( str_csp ) == 0 )
83
csp = info->csp & X264_CSP_MASK;
84
else
85
- for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
86
- csp--;
87
+ for( csp = X264_CSP_CLI_MAX-1; csp > X264_CSP_NONE; csp-- )
88
+ {
89
+ if( x264_cli_csps[csp].name && !strcasecmp( x264_cli_csps[csp].name, str_csp ) )
90
+ break;
91
+ }
92
FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
93
h->dst_csp = csp;
94
if( depth == 16 )
95
96
h->scale = input_prop;
97
if( !h->buffer_allocated )
98
{
99
- if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
100
+ if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) )
101
return -1;
102
h->buffer_allocated = 1;
103
}
104
105
int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
106
107
/* confirm swscale can support this conversion */
108
- FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
109
+ FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE,
110
"input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ),
111
info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
112
FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", av_get_pix_fmt_name( src_pix_fmt ) )
113
- FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
114
+ FAIL_IF_ERROR( h->dst.pix_fmt == AV_PIX_FMT_NONE && dst_pix_fmt_inv != AV_PIX_FMT_NONE,
115
"input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( dst_pix_fmt_inv ),
116
h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
117
FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", av_get_pix_fmt_name( h->dst.pix_fmt ) )
118
x264-snapshot-20130723-2245.tar.bz2/filters/video/select_every.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/select_every.c
Changed
10
1
2
/*****************************************************************************
3
* select_every.c: select-every video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/source.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/source.c
Changed
10
1
2
/*****************************************************************************
3
* source.c: source video filter
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/video.c -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.c
Changed
10
1
2
/*****************************************************************************
3
* video.c: video filters
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/filters/video/video.h -> x264-snapshot-20140321-2245.tar.bz2/filters/video/video.h
Changed
10
1
2
/*****************************************************************************
3
* video.h: video filters
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/input/avs.c -> x264-snapshot-20140321-2245.tar.bz2/input/avs.c
Changed
69
1
2
/*****************************************************************************
3
* avs.c: avisynth input
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
11
#define avs_address dlsym
12
#else
13
#include <windows.h>
14
-#define avs_open LoadLibrary( "avisynth" )
15
+#define avs_open LoadLibraryW( L"avisynth" )
16
#define avs_close FreeLibrary
17
#define avs_address GetProcAddress
18
#endif
19
20
21
static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
22
{
23
- FILE *fh = fopen( psz_filename, "r" );
24
+ FILE *fh = x264_fopen( psz_filename, "r" );
25
if( !fh )
26
return -1;
27
FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
28
29
if( avs_version <= 0 )
30
return -1;
31
x264_cli_log( "avs", X264_LOG_DEBUG, "using avisynth version %.2f\n", avs_version );
32
+
33
+#ifdef _WIN32
34
+ /* Avisynth doesn't support Unicode filenames. */
35
+ char ansi_filename[MAX_PATH];
36
+ FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
37
+ AVS_Value arg = avs_new_value_string( ansi_filename );
38
+#else
39
AVS_Value arg = avs_new_value_string( psz_filename );
40
+#endif
41
+
42
AVS_Value res;
43
char *filename_ext = get_filename_extension( psz_filename );
44
45
46
info->csp = X264_CSP_I420;
47
#if HAVE_SWSCALE
48
else if( avs_is_yuy2( vi ) )
49
- info->csp = PIX_FMT_YUYV422 | X264_CSP_OTHER;
50
+ info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER;
51
else if( avs_is_yv411( vi ) )
52
- info->csp = PIX_FMT_YUV411P | X264_CSP_OTHER;
53
+ info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER;
54
else if( avs_is_y8( vi ) )
55
- info->csp = PIX_FMT_GRAY8 | X264_CSP_OTHER;
56
+ info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER;
57
#endif
58
else
59
info->csp = X264_CSP_NONE;
60
61
if( cli_csp )
62
pic->img.planes = cli_csp->planes;
63
#if HAVE_SWSCALE
64
- else if( csp == (PIX_FMT_YUV411P | X264_CSP_OTHER) )
65
+ else if( csp == (AV_PIX_FMT_YUV411P | X264_CSP_OTHER) )
66
pic->img.planes = 3;
67
else
68
pic->img.planes = 1; //y8 and yuy2 are one plane
69
x264-snapshot-20130723-2245.tar.bz2/input/ffms.c -> x264-snapshot-20140321-2245.tar.bz2/input/ffms.c
Changed
105
1
2
/*****************************************************************************
3
* ffms.c: ffmpegsource input
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
9
* Steven Walters <kemuri9@gmail.com>
10
+ * Henrik Gramner <henrik@gramner.com>
11
*
12
* This program is free software; you can redistribute it and/or modify
13
* it under the terms of the GNU General Public License as published by
14
15
16
#ifdef _WIN32
17
#include <windows.h>
18
-#else
19
-#define SetConsoleTitle(t)
20
#endif
21
22
typedef struct
23
24
char buf[200];
25
sprintf( buf, "ffms [info]: indexing input file [%.1f%%]", 100.0 * current / total );
26
fprintf( stderr, "%s \r", buf+5 );
27
- SetConsoleTitle( buf );
28
+ x264_cli_set_console_title( buf );
29
fflush( stderr );
30
return 0;
31
}
32
33
{
34
switch( csp )
35
{
36
- case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
37
- case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
38
- case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
39
+ case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
40
+ case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
41
+ case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
42
default: return csp;
43
}
44
}
45
46
ffms_hnd_t *h = calloc( 1, sizeof(ffms_hnd_t) );
47
if( !h )
48
return -1;
49
+
50
+#ifdef __MINGW32__
51
+ /* FFMS supports UTF-8 filenames, but it uses std::fstream internally which is broken with Unicode in MinGW. */
52
FFMS_Init( 0, 0 );
53
+ char src_filename[MAX_PATH];
54
+ char idx_filename[MAX_PATH];
55
+ FAIL_IF_ERROR( !x264_ansi_filename( psz_filename, src_filename, MAX_PATH, 0 ), "invalid ansi filename\n" );
56
+ if( opt->index_file )
57
+ FAIL_IF_ERROR( !x264_ansi_filename( opt->index_file, idx_filename, MAX_PATH, 1 ), "invalid ansi filename\n" );
58
+#else
59
+ FFMS_Init( 0, 1 );
60
+ char *src_filename = psz_filename;
61
+ char *idx_filename = opt->index_file;
62
+#endif
63
+
64
FFMS_ErrorInfo e;
65
e.BufferSize = 0;
66
int seekmode = opt->seek ? FFMS_SEEK_NORMAL : FFMS_SEEK_LINEAR_NO_RW;
67
68
FFMS_Index *idx = NULL;
69
if( opt->index_file )
70
{
71
- struct stat index_s, input_s;
72
- if( !stat( opt->index_file, &index_s ) && !stat( psz_filename, &input_s ) &&
73
- input_s.st_mtime < index_s.st_mtime )
74
- idx = FFMS_ReadIndex( opt->index_file, &e );
75
+ x264_struct_stat index_s, input_s;
76
+ if( !x264_stat( opt->index_file, &index_s ) && !x264_stat( psz_filename, &input_s ) &&
77
+ input_s.st_mtime < index_s.st_mtime && index_s.st_size )
78
+ idx = FFMS_ReadIndex( idx_filename, &e );
79
}
80
if( !idx )
81
{
82
if( opt->progress )
83
{
84
- idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
85
+ idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, update_progress, &h->time, &e );
86
fprintf( stderr, " \r" );
87
}
88
else
89
- idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
90
+ idx = FFMS_MakeIndex( src_filename, 0, 0, NULL, NULL, 0, NULL, NULL, &e );
91
FAIL_IF_ERROR( !idx, "could not create index\n" )
92
- if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
93
+ if( opt->index_file && FFMS_WriteIndex( idx_filename, idx, &e ) )
94
x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
95
}
96
97
int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
98
FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
99
100
- h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
101
+ h->video_source = FFMS_CreateVideoSource( src_filename, trackno, idx, 1, seekmode, &e );
102
FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
103
104
h->track = FFMS_GetTrackFromVideo( h->video_source );
105
x264-snapshot-20130723-2245.tar.bz2/input/input.c -> x264-snapshot-20140321-2245.tar.bz2/input/input.c
Changed
63
1
2
/*****************************************************************************
3
* input.c: common input functions
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Steven Walters <kemuri9@gmail.com>
9
*
10
11
int x264_cli_csp_is_invalid( int csp )
12
{
13
int csp_mask = csp & X264_CSP_MASK;
14
- return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
15
+ return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX ||
16
+ csp_mask == X264_CSP_V210 || csp & X264_CSP_OTHER;
17
}
18
19
int x264_cli_csp_depth_factor( int csp )
20
21
return size;
22
}
23
24
-int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
25
+static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
26
{
27
memset( pic, 0, sizeof(cli_pic_t) );
28
int csp_mask = csp & X264_CSP_MASK;
29
30
pic->img.height = height;
31
for( int i = 0; i < pic->img.planes; i++ )
32
{
33
- pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
34
- if( !pic->img.plane[i] )
35
- return -1;
36
- pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
37
+ int stride = width * x264_cli_csps[csp_mask].width[i];
38
+ stride *= x264_cli_csp_depth_factor( csp );
39
+ stride = ALIGN( stride, align );
40
+ uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
41
+ pic->img.plane[i] = x264_malloc( size );
42
+ if( !pic->img.plane[i] )
43
+ return -1;
44
+ pic->img.stride[i] = stride;
45
}
46
47
return 0;
48
}
49
50
+int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
51
+{
52
+ return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
53
+}
54
+
55
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
56
+{
57
+ return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
58
+}
59
+
60
void x264_cli_pic_clean( cli_pic_t *pic )
61
{
62
for( int i = 0; i < pic->img.planes; i++ )
63
x264-snapshot-20130723-2245.tar.bz2/input/input.h -> x264-snapshot-20140321-2245.tar.bz2/input/input.h
Changed
18
1
2
/*****************************************************************************
3
* input.h: file input
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
int x264_cli_csp_is_invalid( int csp );
12
int x264_cli_csp_depth_factor( int csp );
13
int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
14
+int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height );
15
void x264_cli_pic_clean( cli_pic_t *pic );
16
uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
17
uint64_t x264_cli_pic_size( int csp, int width, int height );
18
x264-snapshot-20130723-2245.tar.bz2/input/lavf.c -> x264-snapshot-20140321-2245.tar.bz2/input/lavf.c
Changed
41
1
2
/*****************************************************************************
3
* lavf.c: libavformat input
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Mike Gurlitz <mike.gurlitz@gmail.com>
9
* Steven Walters <kemuri9@gmail.com>
10
11
{
12
switch( csp )
13
{
14
- case PIX_FMT_YUVJ420P: *fullrange = 1; return PIX_FMT_YUV420P;
15
- case PIX_FMT_YUVJ422P: *fullrange = 1; return PIX_FMT_YUV422P;
16
- case PIX_FMT_YUVJ444P: *fullrange = 1; return PIX_FMT_YUV444P;
17
+ case AV_PIX_FMT_YUVJ420P: *fullrange = 1; return AV_PIX_FMT_YUV420P;
18
+ case AV_PIX_FMT_YUVJ422P: *fullrange = 1; return AV_PIX_FMT_YUV422P;
19
+ case AV_PIX_FMT_YUVJ444P: *fullrange = 1; return AV_PIX_FMT_YUV444P;
20
default: return csp;
21
}
22
}
23
24
if( opt->resolution )
25
{
26
av_dict_set( &options, "video_size", opt->resolution, 0 );
27
- const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( PIX_FMT_YUV420P );
28
+ const char *csp = opt->colorspace ? opt->colorspace : av_get_pix_fmt_name( AV_PIX_FMT_YUV420P );
29
av_dict_set( &options, "pixel_format", csp, 0 );
30
}
31
32
33
34
/* avisynth stores rgb data vertically flipped. */
35
if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) &&
36
- (c->pix_fmt == PIX_FMT_BGRA || c->pix_fmt == PIX_FMT_BGR24) )
37
+ (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) )
38
info->csp |= X264_CSP_VFLIP;
39
40
*p_handle = h;
41
x264-snapshot-20130723-2245.tar.bz2/input/raw.c -> x264-snapshot-20140321-2245.tar.bz2/input/raw.c
Changed
66
1
2
/*****************************************************************************
3
* raw.c: raw input
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
FAIL_IF_ERROR( !info->width || !info->height, "raw input requires a resolution.\n" )
12
if( opt->colorspace )
13
{
14
- for( info->csp = X264_CSP_CLI_MAX-1; x264_cli_csps[info->csp].name && strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ); )
15
- info->csp--;
16
+ for( info->csp = X264_CSP_CLI_MAX-1; info->csp > X264_CSP_NONE; info->csp-- )
17
+ {
18
+ if( x264_cli_csps[info->csp].name && !strcasecmp( x264_cli_csps[info->csp].name, opt->colorspace ) )
19
+ break;
20
+ }
21
FAIL_IF_ERROR( info->csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", opt->colorspace );
22
}
23
else /* default */
24
25
if( !strcmp( psz_filename, "-" ) )
26
h->fh = stdin;
27
else
28
- h->fh = fopen( psz_filename, "rb" );
29
+ h->fh = x264_fopen( psz_filename, "rb" );
30
if( h->fh == NULL )
31
return -1;
32
33
34
return 0;
35
}
36
37
-static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
38
+static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
39
{
40
int error = 0;
41
int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
42
for( int i = 0; i < pic->img.planes && !error; i++ )
43
{
44
error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
45
- if( h->bit_depth & 7 )
46
+ if( bit_depth_uc )
47
{
48
/* upconvert non 16bit high depth planes to 16bit using the same
49
* algorithm as used in the depth filter. */
50
51
else
52
while( i_frame > h->next_frame )
53
{
54
- if( read_frame_internal( pic, h ) )
55
+ if( read_frame_internal( pic, h, 0 ) )
56
return -1;
57
h->next_frame++;
58
}
59
}
60
61
- if( read_frame_internal( pic, h ) )
62
+ if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
63
return -1;
64
65
h->next_frame = i_frame+1;
66
x264-snapshot-20130723-2245.tar.bz2/input/thread.c -> x264-snapshot-20140321-2245.tar.bz2/input/thread.c
Changed
10
1
2
/*****************************************************************************
3
* thread.c: threaded input
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/input/timecode.c -> x264-snapshot-20140321-2245.tar.bz2/input/timecode.c
Changed
19
1
2
/*****************************************************************************
3
* timecode.c: timecode file input
4
*****************************************************************************
5
- * Copyright (C) 2010-2013 x264 project
6
+ * Copyright (C) 2010-2014 x264 project
7
*
8
* Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
9
*
10
11
timecode_input.picture_alloc = h->input.picture_alloc;
12
timecode_input.picture_clean = h->input.picture_clean;
13
14
- tcfile_in = fopen( psz_filename, "rb" );
15
+ tcfile_in = x264_fopen( psz_filename, "rb" );
16
FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
17
else if( !x264_is_regular_file( tcfile_in ) )
18
{
19
x264-snapshot-20130723-2245.tar.bz2/input/y4m.c -> x264-snapshot-20140321-2245.tar.bz2/input/y4m.c
Changed
53
1
2
/*****************************************************************************
3
* y4m.c: y4m input
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
if( !strcmp( psz_filename, "-" ) )
12
h->fh = stdin;
13
else
14
- h->fh = fopen(psz_filename, "rb");
15
+ h->fh = x264_fopen(psz_filename, "rb");
16
if( h->fh == NULL )
17
return -1;
18
19
20
return 0;
21
}
22
23
-static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h )
24
+static int read_frame_internal( cli_pic_t *pic, y4m_hnd_t *h, int bit_depth_uc )
25
{
26
size_t slen = strlen( Y4M_FRAME_MAGIC );
27
int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
28
29
for( i = 0; i < pic->img.planes && !error; i++ )
30
{
31
error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
32
- if( h->bit_depth & 7 )
33
+ if( bit_depth_uc )
34
{
35
/* upconvert non 16bit high depth planes to 16bit using the same
36
* algorithm as used in the depth filter. */
37
38
else
39
while( i_frame > h->next_frame )
40
{
41
- if( read_frame_internal( pic, h ) )
42
+ if( read_frame_internal( pic, h, 0 ) )
43
return -1;
44
h->next_frame++;
45
}
46
}
47
48
- if( read_frame_internal( pic, h ) )
49
+ if( read_frame_internal( pic, h, h->bit_depth & 7 ) )
50
return -1;
51
52
h->next_frame = i_frame+1;
53
x264-snapshot-20130723-2245.tar.bz2/output/flv.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv.c
Changed
23
1
2
/*****************************************************************************
3
* flv.c: flv muxer
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Kieran Kunhya <kieran@kunhya.com>
9
*
10
11
12
static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
13
{
14
- flv_hnd_t *p_flv = malloc( sizeof(*p_flv) );
15
*p_handle = NULL;
16
+ flv_hnd_t *p_flv = calloc( 1, sizeof(flv_hnd_t) );
17
if( !p_flv )
18
return -1;
19
- memset( p_flv, 0, sizeof(*p_flv) );
20
21
p_flv->b_dts_compress = opt->use_dts_compress;
22
23
x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.c -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.c
Changed
29
1
2
/*****************************************************************************
3
* flv_bytestream.c: flv muxer utilities
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Kieran Kunhya <kieran@kunhya.com>
9
*
10
11
12
flv_buffer *flv_create_writer( const char *filename )
13
{
14
- flv_buffer *c = malloc( sizeof(*c) );
15
-
16
+ flv_buffer *c = calloc( 1, sizeof(flv_buffer) );
17
if( !c )
18
return NULL;
19
- memset( c, 0, sizeof(*c) );
20
21
if( !strcmp( filename, "-" ) )
22
c->fp = stdout;
23
else
24
- c->fp = fopen( filename, "wb" );
25
+ c->fp = x264_fopen( filename, "wb" );
26
if( !c->fp )
27
{
28
free( c );
29
x264-snapshot-20130723-2245.tar.bz2/output/flv_bytestream.h -> x264-snapshot-20140321-2245.tar.bz2/output/flv_bytestream.h
Changed
10
1
2
/*****************************************************************************
3
* flv_bytestream.h: flv muxer utilities
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Kieran Kunhya <kieran@kunhya.com>
9
*
10
x264-snapshot-20130723-2245.tar.bz2/output/matroska.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska.c
Changed
53
1
2
/*****************************************************************************
3
* matroska.c: matroska muxer
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Mike Matsnev <mike@haali.su>
9
*
10
11
int width, height, d_width, d_height;
12
13
int display_size_units;
14
+ int stereo_mode;
15
16
int64_t frame_duration;
17
18
19
20
static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
21
{
22
- mkv_hnd_t *p_mkv;
23
-
24
*p_handle = NULL;
25
-
26
- p_mkv = malloc( sizeof(*p_mkv) );
27
+ mkv_hnd_t *p_mkv = calloc( 1, sizeof(mkv_hnd_t) );
28
if( !p_mkv )
29
return -1;
30
31
- memset( p_mkv, 0, sizeof(*p_mkv) );
32
-
33
p_mkv->w = mk_create_writer( psz_filename );
34
if( !p_mkv->w )
35
{
36
37
p_mkv->width = p_mkv->d_width = p_param->i_width;
38
p_mkv->height = p_mkv->d_height = p_param->i_height;
39
p_mkv->display_size_units = DS_PIXELS;
40
+ p_mkv->stereo_mode = p_param->i_frame_packing;
41
42
if( p_param->vui.i_sar_width && p_param->vui.i_sar_height
43
&& p_param->vui.i_sar_width != p_param->vui.i_sar_height )
44
45
ret = mk_write_header( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
46
avcC, avcC_len, p_mkv->frame_duration, 50000,
47
p_mkv->width, p_mkv->height,
48
- p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units );
49
+ p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode );
50
if( ret < 0 )
51
return ret;
52
53
x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.c -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.c
Changed
71
1
2
/*****************************************************************************
3
* matroska_ebml.c: matroska muxer utilities
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Mike Matsnev <mike@haali.su>
9
*
10
11
}
12
else
13
{
14
- c = malloc( sizeof(*c) );
15
+ c = calloc( 1, sizeof(mk_context) );
16
if( !c )
17
return NULL;
18
- memset( c, 0, sizeof(*c) );
19
}
20
21
c->parent = parent;
22
23
24
mk_writer *mk_create_writer( const char *filename )
25
{
26
- mk_writer *w = malloc( sizeof(*w) );
27
+ mk_writer *w = calloc( 1, sizeof(mk_writer) );
28
if( !w )
29
return NULL;
30
31
- memset( w, 0, sizeof(*w) );
32
-
33
w->root = mk_create_context( w, NULL, 0 );
34
if( !w->root )
35
{
36
37
if( !strcmp( filename, "-" ) )
38
w->fp = stdout;
39
else
40
- w->fp = fopen( filename, "wb" );
41
+ w->fp = x264_fopen( filename, "wb" );
42
if( !w->fp )
43
{
44
mk_destroy_contexts( w );
45
46
return w;
47
}
48
49
+static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13};
50
+
51
int mk_write_header( mk_writer *w, const char *writing_app,
52
const char *codec_id,
53
const void *codec_private, unsigned codec_private_size,
54
int64_t default_frame_duration,
55
int64_t timescale,
56
unsigned width, unsigned height,
57
- unsigned d_width, unsigned d_height, int display_size_units )
58
+ unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode )
59
{
60
mk_context *c, *ti, *v;
61
62
63
CHECK( mk_write_uint( v, 0x54b2, display_size_units ) );
64
CHECK( mk_write_uint( v, 0x54b0, d_width ) );
65
CHECK( mk_write_uint( v, 0x54ba, d_height ) );
66
+ if( stereo_mode >= 0 && stereo_mode <= 5 )
67
+ CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) );
68
CHECK( mk_close_context( v, 0 ) );
69
70
CHECK( mk_close_context( ti, 0 ) );
71
x264-snapshot-20130723-2245.tar.bz2/output/matroska_ebml.h -> x264-snapshot-20140321-2245.tar.bz2/output/matroska_ebml.h
Changed
19
1
2
/*****************************************************************************
3
* matroska_ebml.h: matroska muxer utilities
4
*****************************************************************************
5
- * Copyright (C) 2005-2013 x264 project
6
+ * Copyright (C) 2005-2014 x264 project
7
*
8
* Authors: Mike Matsnev <mike@haali.su>
9
*
10
11
int64_t default_frame_duration,
12
int64_t timescale,
13
unsigned width, unsigned height,
14
- unsigned d_width, unsigned d_height, int display_size_units );
15
+ unsigned d_width, unsigned d_height, int display_size_units, int stereo_mode );
16
17
int mk_start_frame( mk_writer *w );
18
int mk_add_frame_data( mk_writer *w, const void *data, unsigned size );
19
x264-snapshot-20130723-2245.tar.bz2/output/mp4.c -> x264-snapshot-20140321-2245.tar.bz2/output/mp4.c
Changed
57
1
2
/*****************************************************************************
3
* mp4.c: mp4 muxer
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
#include "output.h"
12
#include <gpac/isomedia.h>
13
14
-#if HAVE_GF_MALLOC
15
-#undef malloc
16
-#undef free
17
-#undef realloc
18
-#define malloc gf_malloc
19
-#define free gf_free
20
-#define realloc gf_realloc
21
+#ifdef _WIN32
22
+#include <windows.h>
23
#endif
24
25
typedef struct
26
27
28
static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
29
{
30
- mp4_hnd_t *p_mp4;
31
-
32
*p_handle = NULL;
33
- FILE *fh = fopen( psz_filename, "w" );
34
+ FILE *fh = x264_fopen( psz_filename, "w" );
35
if( !fh )
36
return -1;
37
FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
38
fclose( fh );
39
40
- if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
41
+ mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
42
+ if( !p_mp4 )
43
return -1;
44
45
- memset( p_mp4, 0, sizeof(mp4_hnd_t) );
46
+#ifdef _WIN32
47
+ /* GPAC doesn't support Unicode filenames. */
48
+ char ansi_filename[MAX_PATH];
49
+ FAIL_IF_ERR( !x264_ansi_filename( psz_filename, ansi_filename, MAX_PATH, 1 ), "mp4", "invalid ansi filename\n" )
50
+ p_mp4->p_file = gf_isom_open( ansi_filename, GF_ISOM_OPEN_WRITE, NULL );
51
+#else
52
p_mp4->p_file = gf_isom_open( psz_filename, GF_ISOM_OPEN_WRITE, NULL );
53
+#endif
54
55
p_mp4->b_dts_compress = opt->use_dts_compress;
56
57
x264-snapshot-20140321-2245.tar.bz2/output/mp4_lsmash.c
Added
421
1
2
+/*****************************************************************************
3
+ * mp4_lsmash.c: mp4 muxer using L-SMASH
4
+ *****************************************************************************
5
+ * Copyright (C) 2003-2014 x264 project
6
+ *
7
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8
+ * Loren Merritt <lorenm@u.washington.edu>
9
+ * Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
10
+ * Takashi Hirata <silverfilain@gmail.com>
11
+ * golgol7777 <golgol7777@gmail.com>
12
+ *
13
+ * This program is free software; you can redistribute it and/or modify
14
+ * it under the terms of the GNU General Public License as published by
15
+ * the Free Software Foundation; either version 2 of the License, or
16
+ * (at your option) any later version.
17
+ *
18
+ * This program is distributed in the hope that it will be useful,
19
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
+ * GNU General Public License for more details.
22
+ *
23
+ * You should have received a copy of the GNU General Public License
24
+ * along with this program; if not, write to the Free Software
25
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
26
+ *
27
+ * This program is also available under a commercial proprietary license.
28
+ * For more information, contact us at licensing@x264.com.
29
+ *****************************************************************************/
30
+
31
+#include "output.h"
32
+#include <lsmash.h>
33
+
34
+#define H264_NALU_LENGTH_SIZE 4
35
+
36
+/*******************/
37
+
38
+#define MP4_LOG_ERROR( ... ) x264_cli_log( "mp4", X264_LOG_ERROR, __VA_ARGS__ )
39
+#define MP4_LOG_WARNING( ... ) x264_cli_log( "mp4", X264_LOG_WARNING, __VA_ARGS__ )
40
+#define MP4_LOG_INFO( ... ) x264_cli_log( "mp4", X264_LOG_INFO, __VA_ARGS__ )
41
+#define MP4_FAIL_IF_ERR( cond, ... ) FAIL_IF_ERR( cond, "mp4", __VA_ARGS__ )
42
+
43
+/* For close_file() */
44
+#define MP4_LOG_IF_ERR( cond, ... )\
45
+if( cond )\
46
+{\
47
+ MP4_LOG_ERROR( __VA_ARGS__ );\
48
+}
49
+
50
+/* For open_file() */
51
+#define MP4_FAIL_IF_ERR_EX( cond, ... )\
52
+if( cond )\
53
+{\
54
+ remove_mp4_hnd( p_mp4 );\
55
+ MP4_LOG_ERROR( __VA_ARGS__ );\
56
+ return -1;\
57
+}
58
+
59
+/*******************/
60
+
61
+typedef struct
62
+{
63
+ lsmash_root_t *p_root;
64
+ lsmash_video_summary_t *summary;
65
+ int b_stdout;
66
+ uint32_t i_movie_timescale;
67
+ uint32_t i_video_timescale;
68
+ uint32_t i_track;
69
+ uint32_t i_sample_entry;
70
+ uint64_t i_time_inc;
71
+ int64_t i_start_offset;
72
+ uint64_t i_first_cts;
73
+ uint64_t i_prev_dts;
74
+ uint32_t i_sei_size;
75
+ uint8_t *p_sei_buffer;
76
+ int i_numframe;
77
+ int64_t i_init_delta;
78
+ int i_delay_frames;
79
+ int b_dts_compress;
80
+ int i_dts_compress_multiplier;
81
+ int b_use_recovery;
82
+ int b_fragments;
83
+} mp4_hnd_t;
84
+
85
+/*******************/
86
+
87
+static void remove_mp4_hnd( hnd_t handle )
88
+{
89
+ mp4_hnd_t *p_mp4 = handle;
90
+ if( !p_mp4 )
91
+ return;
92
+ if( p_mp4->p_sei_buffer )
93
+ {
94
+ free( p_mp4->p_sei_buffer );
95
+ p_mp4->p_sei_buffer = NULL;
96
+ }
97
+ if( p_mp4->p_root )
98
+ {
99
+ lsmash_destroy_root( p_mp4->p_root );
100
+ p_mp4->p_root = NULL;
101
+ }
102
+ free( p_mp4 );
103
+}
104
+
105
+/*******************/
106
+
107
+static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts )
108
+{
109
+ mp4_hnd_t *p_mp4 = handle;
110
+
111
+ if( !p_mp4 )
112
+ return 0;
113
+
114
+ if( p_mp4->p_root )
115
+ {
116
+ double actual_duration = 0;
117
+ if( p_mp4->i_track )
118
+ {
119
+ /* Flush the rest of samples and add the last sample_delta. */
120
+ uint32_t last_delta = largest_pts - second_largest_pts;
121
+ MP4_LOG_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, (last_delta ? last_delta : 1) * p_mp4->i_time_inc ),
122
+ "failed to flush the rest of samples.\n" );
123
+
124
+ if( p_mp4->i_movie_timescale != 0 && p_mp4->i_video_timescale != 0 ) /* avoid zero division */
125
+ actual_duration = ((double)((largest_pts + last_delta) * p_mp4->i_time_inc) / p_mp4->i_video_timescale) * p_mp4->i_movie_timescale;
126
+ else
127
+ MP4_LOG_ERROR( "timescale is broken.\n" );
128
+
129
+ /*
130
+ * Declare the explicit time-line mapping.
131
+ * A segment_duration is given by movie timescale, while a media_time that is the start time of this segment
132
+ * is given by not the movie timescale but rather the media timescale.
133
+ * The reason is that ISO media have two time-lines, presentation and media time-line,
134
+ * and an edit maps the presentation time-line to the media time-line.
135
+ * According to QuickTime file format specification and the actual playback in QuickTime Player,
136
+ * if the Edit Box doesn't exist in the track, the ratio of the summation of sample durations and track's duration becomes
137
+ * the track's media_rate so that the entire media can be used by the track.
138
+ * So, we add Edit Box here to avoid this implicit media_rate could distort track's presentation timestamps slightly.
139
+ * Note: Any demuxers should follow the Edit List Box if it exists.
140
+ */
141
+ lsmash_edit_t edit;
142
+ edit.duration = actual_duration;
143
+ edit.start_time = p_mp4->i_first_cts;
144
+ edit.rate = ISOM_EDIT_MODE_NORMAL;
145
+ if( !p_mp4->b_fragments )
146
+ {
147
+ MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
148
+ "failed to set timeline map for video.\n" );
149
+ }
150
+ else if( !p_mp4->b_stdout )
151
+ MP4_LOG_IF_ERR( lsmash_modify_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, 1, edit ),
152
+ "failed to update timeline map for video.\n" );
153
+ }
154
+
155
+ MP4_LOG_IF_ERR( lsmash_finish_movie( p_mp4->p_root, NULL ), "failed to finish movie.\n" );
156
+ }
157
+
158
+ remove_mp4_hnd( p_mp4 ); /* including lsmash_destroy_root( p_mp4->p_root ); */
159
+
160
+ return 0;
161
+}
162
+
163
+static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt )
164
+{
165
+ *p_handle = NULL;
166
+
167
+ int b_regular = strcmp( psz_filename, "-" );
168
+ b_regular = b_regular && x264_is_regular_file_path( psz_filename );
169
+ if( b_regular )
170
+ {
171
+ FILE *fh = x264_fopen( psz_filename, "wb" );
172
+ MP4_FAIL_IF_ERR( !fh, "cannot open output file `%s'.\n", psz_filename );
173
+ b_regular = x264_is_regular_file( fh );
174
+ fclose( fh );
175
+ }
176
+
177
+ mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) );
178
+ MP4_FAIL_IF_ERR( !p_mp4, "failed to allocate memory for muxer information.\n" );
179
+
180
+ p_mp4->b_dts_compress = opt->use_dts_compress;
181
+ p_mp4->b_use_recovery = 0; // we don't really support recovery
182
+ p_mp4->b_fragments = !b_regular;
183
+ p_mp4->b_stdout = !strcmp( psz_filename, "-" );
184
+
185
+ p_mp4->p_root = lsmash_open_movie( psz_filename, p_mp4->b_fragments ? LSMASH_FILE_MODE_WRITE_FRAGMENTED : LSMASH_FILE_MODE_WRITE );
186
+ MP4_FAIL_IF_ERR_EX( !p_mp4->p_root, "failed to create root.\n" );
187
+
188
+ p_mp4->summary = (lsmash_video_summary_t *)lsmash_create_summary( LSMASH_SUMMARY_TYPE_VIDEO );
189
+ MP4_FAIL_IF_ERR_EX( !p_mp4->summary,
190
+ "failed to allocate memory for summary information of video.\n" );
191
+ p_mp4->summary->sample_type = ISOM_CODEC_TYPE_AVC1_VIDEO;
192
+
193
+ *p_handle = p_mp4;
194
+
195
+ return 0;
196
+}
197
+
198
+static int set_param( hnd_t handle, x264_param_t *p_param )
199
+{
200
+ mp4_hnd_t *p_mp4 = handle;
201
+ uint64_t i_media_timescale;
202
+
203
+ p_mp4->i_delay_frames = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
204
+ p_mp4->i_dts_compress_multiplier = p_mp4->b_dts_compress * p_mp4->i_delay_frames + 1;
205
+
206
+ i_media_timescale = (uint64_t)p_param->i_timebase_den * p_mp4->i_dts_compress_multiplier;
207
+ p_mp4->i_time_inc = (uint64_t)p_param->i_timebase_num * p_mp4->i_dts_compress_multiplier;
208
+ MP4_FAIL_IF_ERR( i_media_timescale > UINT32_MAX, "MP4 media timescale %"PRIu64" exceeds maximum\n", i_media_timescale );
209
+
210
+ /* Select brands. */
211
+ lsmash_brand_type brands[6] = { 0 };
212
+ uint32_t brand_count = 0;
213
+ brands[brand_count++] = ISOM_BRAND_TYPE_MP42;
214
+ brands[brand_count++] = ISOM_BRAND_TYPE_MP41;
215
+ brands[brand_count++] = ISOM_BRAND_TYPE_ISOM;
216
+ if( p_mp4->b_use_recovery )
217
+ {
218
+ brands[brand_count++] = ISOM_BRAND_TYPE_AVC1; /* sdtp, sgpd, sbgp and visual roll recovery grouping */
219
+ if( p_param->b_open_gop )
220
+ brands[brand_count++] = ISOM_BRAND_TYPE_ISO6; /* cslg and visual random access grouping */
221
+ }
222
+
223
+ /* Set movie parameters. */
224
+ lsmash_movie_parameters_t movie_param;
225
+ lsmash_initialize_movie_parameters( &movie_param );
226
+ movie_param.major_brand = ISOM_BRAND_TYPE_MP42;
227
+ movie_param.brands = brands;
228
+ movie_param.number_of_brands = brand_count;
229
+ MP4_FAIL_IF_ERR( lsmash_set_movie_parameters( p_mp4->p_root, &movie_param ),
230
+ "failed to set movie parameters.\n" );
231
+ p_mp4->i_movie_timescale = lsmash_get_movie_timescale( p_mp4->p_root );
232
+ MP4_FAIL_IF_ERR( !p_mp4->i_movie_timescale, "movie timescale is broken.\n" );
233
+
234
+ /* Create a video track. */
235
+ p_mp4->i_track = lsmash_create_track( p_mp4->p_root, ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK );
236
+ MP4_FAIL_IF_ERR( !p_mp4->i_track, "failed to create a video track.\n" );
237
+
238
+ p_mp4->summary->width = p_param->i_width;
239
+ p_mp4->summary->height = p_param->i_height;
240
+ uint32_t i_display_width = p_param->i_width << 16;
241
+ uint32_t i_display_height = p_param->i_height << 16;
242
+ if( p_param->vui.i_sar_width && p_param->vui.i_sar_height )
243
+ {
244
+ double sar = (double)p_param->vui.i_sar_width / p_param->vui.i_sar_height;
245
+ if( sar > 1.0 )
246
+ i_display_width *= sar;
247
+ else
248
+ i_display_height /= sar;
249
+ p_mp4->summary->par_h = p_param->vui.i_sar_width;
250
+ p_mp4->summary->par_v = p_param->vui.i_sar_height;
251
+ }
252
+ p_mp4->summary->color.primaries_index = p_param->vui.i_colorprim;
253
+ p_mp4->summary->color.transfer_index = p_param->vui.i_transfer;
254
+ p_mp4->summary->color.matrix_index = p_param->vui.i_colmatrix >= 0 ? p_param->vui.i_colmatrix : ISOM_MATRIX_INDEX_UNSPECIFIED;
255
+ p_mp4->summary->color.full_range = p_param->vui.b_fullrange >= 0 ? p_param->vui.b_fullrange : 0;
256
+
257
+ /* Set video track parameters. */
258
+ lsmash_track_parameters_t track_param;
259
+ lsmash_initialize_track_parameters( &track_param );
260
+ lsmash_track_mode track_mode = ISOM_TRACK_ENABLED | ISOM_TRACK_IN_MOVIE | ISOM_TRACK_IN_PREVIEW;
261
+ track_param.mode = track_mode;
262
+ track_param.display_width = i_display_width;
263
+ track_param.display_height = i_display_height;
264
+ MP4_FAIL_IF_ERR( lsmash_set_track_parameters( p_mp4->p_root, p_mp4->i_track, &track_param ),
265
+ "failed to set track parameters for video.\n" );
266
+
267
+ /* Set video media parameters. */
268
+ lsmash_media_parameters_t media_param;
269
+ lsmash_initialize_media_parameters( &media_param );
270
+ media_param.timescale = i_media_timescale;
271
+ media_param.media_handler_name = "L-SMASH Video Media Handler";
272
+ if( p_mp4->b_use_recovery )
273
+ {
274
+ media_param.roll_grouping = p_param->b_intra_refresh;
275
+ media_param.rap_grouping = p_param->b_open_gop;
276
+ }
277
+ MP4_FAIL_IF_ERR( lsmash_set_media_parameters( p_mp4->p_root, p_mp4->i_track, &media_param ),
278
+ "failed to set media parameters for video.\n" );
279
+ p_mp4->i_video_timescale = lsmash_get_media_timescale( p_mp4->p_root, p_mp4->i_track );
280
+ MP4_FAIL_IF_ERR( !p_mp4->i_video_timescale, "media timescale for video is broken.\n" );
281
+
282
+ return 0;
283
+}
284
+
285
+static int write_headers( hnd_t handle, x264_nal_t *p_nal )
286
+{
287
+ mp4_hnd_t *p_mp4 = handle;
288
+
289
+ uint32_t sps_size = p_nal[0].i_payload - H264_NALU_LENGTH_SIZE;
290
+ uint32_t pps_size = p_nal[1].i_payload - H264_NALU_LENGTH_SIZE;
291
+ uint32_t sei_size = p_nal[2].i_payload;
292
+
293
+ uint8_t *sps = p_nal[0].p_payload + H264_NALU_LENGTH_SIZE;
294
+ uint8_t *pps = p_nal[1].p_payload + H264_NALU_LENGTH_SIZE;
295
+ uint8_t *sei = p_nal[2].p_payload;
296
+
297
+ lsmash_codec_specific_t *cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264,
298
+ LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
299
+
300
+ lsmash_h264_specific_parameters_t *param = (lsmash_h264_specific_parameters_t *)cs->data.structured;
301
+ param->lengthSizeMinusOne = H264_NALU_LENGTH_SIZE - 1;
302
+
303
+ /* SPS
304
+ * The remaining parameters are automatically set by SPS. */
305
+ if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_SPS, sps, sps_size ) )
306
+ {
307
+ MP4_LOG_ERROR( "failed to append SPS.\n" );
308
+ return -1;
309
+ }
310
+
311
+ /* PPS */
312
+ if( lsmash_append_h264_parameter_set( param, H264_PARAMETER_SET_TYPE_PPS, pps, pps_size ) )
313
+ {
314
+ MP4_LOG_ERROR( "failed to append PPS.\n" );
315
+ return -1;
316
+ }
317
+
318
+ if( lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs ) )
319
+ {
320
+ MP4_LOG_ERROR( "failed to add H.264 specific info.\n" );
321
+ return -1;
322
+ }
323
+
324
+ lsmash_destroy_codec_specific_data( cs );
325
+
326
+ /* Additional extensions */
327
+ /* Bitrate info */
328
+ cs = lsmash_create_codec_specific_data( LSMASH_CODEC_SPECIFIC_DATA_TYPE_ISOM_VIDEO_H264_BITRATE,
329
+ LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED );
330
+ if( cs )
331
+ lsmash_add_codec_specific_data( (lsmash_summary_t *)p_mp4->summary, cs );
332
+ lsmash_destroy_codec_specific_data( cs );
333
+
334
+ p_mp4->i_sample_entry = lsmash_add_sample_entry( p_mp4->p_root, p_mp4->i_track, p_mp4->summary );
335
+ MP4_FAIL_IF_ERR( !p_mp4->i_sample_entry,
336
+ "failed to add sample entry for video.\n" );
337
+
338
+ /* SEI */
339
+ p_mp4->p_sei_buffer = malloc( sei_size );
340
+ MP4_FAIL_IF_ERR( !p_mp4->p_sei_buffer,
341
+ "failed to allocate sei transition buffer.\n" );
342
+ memcpy( p_mp4->p_sei_buffer, sei, sei_size );
343
+ p_mp4->i_sei_size = sei_size;
344
+
345
+ return sei_size + sps_size + pps_size;
346
+}
347
+
348
+static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_t *p_picture )
349
+{
350
+ mp4_hnd_t *p_mp4 = handle;
351
+ uint64_t dts, cts;
352
+
353
+ if( !p_mp4->i_numframe )
354
+ {
355
+ p_mp4->i_start_offset = p_picture->i_dts * -1;
356
+ p_mp4->i_first_cts = p_mp4->b_dts_compress ? 0 : p_mp4->i_start_offset * p_mp4->i_time_inc;
357
+ if( p_mp4->b_fragments )
358
+ {
359
+ lsmash_edit_t edit;
360
+ edit.duration = ISOM_EDIT_DURATION_UNKNOWN32; /* QuickTime doesn't support 64bit duration. */
361
+ edit.start_time = p_mp4->i_first_cts;
362
+ edit.rate = ISOM_EDIT_MODE_NORMAL;
363
+ MP4_LOG_IF_ERR( lsmash_create_explicit_timeline_map( p_mp4->p_root, p_mp4->i_track, edit ),
364
+ "failed to set timeline map for video.\n" );
365
+ }
366
+ }
367
+
368
+ lsmash_sample_t *p_sample = lsmash_create_sample( i_size + p_mp4->i_sei_size );
369
+ MP4_FAIL_IF_ERR( !p_sample,
370
+ "failed to create a video sample data.\n" );
371
+
372
+ if( p_mp4->p_sei_buffer )
373
+ {
374
+ memcpy( p_sample->data, p_mp4->p_sei_buffer, p_mp4->i_sei_size );
375
+ free( p_mp4->p_sei_buffer );
376
+ p_mp4->p_sei_buffer = NULL;
377
+ }
378
+
379
+ memcpy( p_sample->data + p_mp4->i_sei_size, p_nalu, i_size );
380
+ p_mp4->i_sei_size = 0;
381
+
382
+ if( p_mp4->b_dts_compress )
383
+ {
384
+ if( p_mp4->i_numframe == 1 )
385
+ p_mp4->i_init_delta = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
386
+ dts = p_mp4->i_numframe > p_mp4->i_delay_frames
387
+ ? p_picture->i_dts * p_mp4->i_time_inc
388
+ : p_mp4->i_numframe * (p_mp4->i_init_delta / p_mp4->i_dts_compress_multiplier);
389
+ cts = p_picture->i_pts * p_mp4->i_time_inc;
390
+ }
391
+ else
392
+ {
393
+ dts = (p_picture->i_dts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
394
+ cts = (p_picture->i_pts + p_mp4->i_start_offset) * p_mp4->i_time_inc;
395
+ }
396
+
397
+ p_sample->dts = dts;
398
+ p_sample->cts = cts;
399
+ p_sample->index = p_mp4->i_sample_entry;
400
+ p_sample->prop.ra_flags = p_picture->b_keyframe ? ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC : ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE;
401
+
402
+ if( p_mp4->b_fragments && p_mp4->i_numframe && p_sample->prop.ra_flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE )
403
+ {
404
+ MP4_FAIL_IF_ERR( lsmash_flush_pooled_samples( p_mp4->p_root, p_mp4->i_track, p_sample->dts - p_mp4->i_prev_dts ),
405
+ "failed to flush the rest of samples.\n" );
406
+ MP4_FAIL_IF_ERR( lsmash_create_fragment_movie( p_mp4->p_root ),
407
+ "failed to create a movie fragment.\n" );
408
+ }
409
+
410
+ /* Append data per sample. */
411
+ MP4_FAIL_IF_ERR( lsmash_append_sample( p_mp4->p_root, p_mp4->i_track, p_sample ),
412
+ "failed to append a video frame.\n" );
413
+
414
+ p_mp4->i_prev_dts = dts;
415
+ p_mp4->i_numframe++;
416
+
417
+ return i_size;
418
+}
419
+
420
+const cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };
421
x264-snapshot-20130723-2245.tar.bz2/output/output.h -> x264-snapshot-20140321-2245.tar.bz2/output/output.h
Changed
10
1
2
/*****************************************************************************
3
* output.h: x264 file output modules
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
x264-snapshot-20130723-2245.tar.bz2/output/raw.c -> x264-snapshot-20140321-2245.tar.bz2/output/raw.c
Changed
19
1
2
/*****************************************************************************
3
* raw.c: raw muxer
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
{
12
if( !strcmp( psz_filename, "-" ) )
13
*p_handle = stdout;
14
- else if( !(*p_handle = fopen( psz_filename, "w+b" )) )
15
+ else if( !(*p_handle = x264_fopen( psz_filename, "w+b" )) )
16
return -1;
17
18
return 0;
19
x264-snapshot-20130723-2245.tar.bz2/tools/checkasm-a.asm -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm-a.asm
Changed
10
1
2
;*****************************************************************************
3
;* checkasm-a.asm: assembly check tool
4
;*****************************************************************************
5
-;* Copyright (C) 2008-2013 x264 project
6
+;* Copyright (C) 2008-2014 x264 project
7
;*
8
;* Authors: Loren Merritt <lorenm@u.washington.edu>
9
;* Henrik Gramner <henrik@gramner.com>
10
x264-snapshot-20130723-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20140321-2245.tar.bz2/tools/checkasm.c
Changed
219
1
2
/*****************************************************************************
3
* checkasm.c: assembly check tool
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
11
b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" :
12
b->cpu&X264_CPU_CACHELINE_64 ? "_c64" :
13
b->cpu&X264_CPU_SLOW_SHUFFLE ? "_slowshuffle" :
14
- b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" :
15
b->cpu&X264_CPU_LZCNT ? "_lzcnt" :
16
b->cpu&X264_CPU_BMI2 ? "_bmi2" :
17
b->cpu&X264_CPU_BMI1 ? "_bmi1" :
18
19
b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" :
20
#endif
21
"",
22
- ((int64_t)10*b->cycles/b->den - nop_time)/4 );
23
+ (int64_t)(10*b->cycles/b->den - nop_time)/4 );
24
}
25
}
26
27
28
} \
29
else \
30
call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
31
- if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
32
+ if( memcmp(res_c, res_asm, N*sizeof(int)) ) \
33
{ \
34
ok = 0; \
35
fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \
36
37
}
38
}
39
}
40
+
41
+ if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
42
+ {
43
+ set_func_name( "plane_copy_deinterleave_rgb" );
44
+ used_asm = 1;
45
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
46
+ {
47
+ int w = (plane_specs[i].w + 2) >> 2;
48
+ int h = plane_specs[i].h;
49
+ intptr_t src_stride = plane_specs[i].src_stride;
50
+ intptr_t dst_stride = ALIGN( w, 16 );
51
+ intptr_t offv = dst_stride*h + 16;
52
+
53
+ for( int pw = 3; pw <= 4; pw++ )
54
+ {
55
+ memset( pbuf3, 0, 0x1000 );
56
+ memset( pbuf4, 0, 0x1000 );
57
+ call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
58
+ call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
59
+ for( int y = 0; y < h; y++ )
60
+ if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
61
+ memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
62
+ memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
63
+ {
64
+ ok = 0;
65
+ fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
66
+ break;
67
+ }
68
+ }
69
+ }
70
+ }
71
report( "plane_copy :" );
72
73
+ if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
74
+ {
75
+ set_func_name( "plane_copy_deinterleave_v210" );
76
+ used_asm = 1;
77
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
78
+ {
79
+ int w = (plane_specs[i].w + 1) >> 1;
80
+ int h = plane_specs[i].h;
81
+ intptr_t dst_stride = ALIGN( w, 16 );
82
+ intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
83
+ intptr_t offv = dst_stride*h + 32;
84
+ memset( pbuf3, 0, 0x1000 );
85
+ memset( pbuf4, 0, 0x1000 );
86
+ call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
87
+ call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
88
+ for( int y = 0; y < h; y++ )
89
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(uint16_t) ) ||
90
+ memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
91
+ {
92
+ ok = 0;
93
+ fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
94
+ break;
95
+ }
96
+ }
97
+ }
98
+ report( "v210 :" );
99
+
100
if( mc_a.hpel_filter != mc_ref.hpel_filter )
101
{
102
pixel *srchpel = pbuf1+8+2*64;
103
104
INTEGRAL_INIT( integral_init8v, 9, sum, stride );
105
report( "integral init :" );
106
107
+ ok = 1; used_asm = 0;
108
if( mc_a.mbtree_propagate_cost != mc_ref.mbtree_propagate_cost )
109
{
110
- ok = 1; used_asm = 1;
111
+ used_asm = 1;
112
x264_emms();
113
for( int i = 0; i < 10; i++ )
114
{
115
- float fps_factor = (rand()&65535) / 256.;
116
- set_func_name( "mbtree_propagate" );
117
- int *dsta = (int*)buf3;
118
- int *dstc = dsta+400;
119
+ float fps_factor = (rand()&65535) / 65535.0f;
120
+ set_func_name( "mbtree_propagate_cost" );
121
+ int16_t *dsta = (int16_t*)buf3;
122
+ int16_t *dstc = dsta+400;
123
uint16_t *prop = (uint16_t*)buf1;
124
uint16_t *intra = (uint16_t*)buf4;
125
uint16_t *inter = intra+128;
126
127
{
128
ok &= abs( dstc[j]-dsta[j] ) <= 1 || fabs( (double)dstc[j]/dsta[j]-1 ) < 1e-4;
129
if( !ok )
130
- fprintf( stderr, "mbtree_propagate FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
131
+ fprintf( stderr, "mbtree_propagate_cost FAILED: %f !~= %f\n", (double)dstc[j], (double)dsta[j] );
132
}
133
}
134
- report( "mbtree propagate :" );
135
}
136
137
+ if( mc_a.mbtree_propagate_list != mc_ref.mbtree_propagate_list )
138
+ {
139
+ used_asm = 1;
140
+ for( int i = 0; i < 8; i++ )
141
+ {
142
+ set_func_name( "mbtree_propagate_list" );
143
+ x264_t h;
144
+ int height = 4;
145
+ int width = 128;
146
+ int size = width*height;
147
+ h.mb.i_mb_stride = width;
148
+ h.mb.i_mb_width = width;
149
+ h.mb.i_mb_height = height;
150
+
151
+ uint16_t *ref_costsc = (uint16_t*)buf3;
152
+ uint16_t *ref_costsa = (uint16_t*)buf4;
153
+ int16_t (*mvs)[2] = (int16_t(*)[2])(ref_costsc + size);
154
+ int16_t *propagate_amount = (int16_t*)(mvs + width);
155
+ uint16_t *lowres_costs = (uint16_t*)(propagate_amount + width);
156
+ h.scratch_buffer2 = (uint8_t*)(ref_costsa + size);
157
+ int bipred_weight = (rand()%63)+1;
158
+ int list = i&1;
159
+ for( int j = 0; j < size; j++ )
160
+ ref_costsc[j] = ref_costsa[j] = rand()&32767;
161
+ for( int j = 0; j < width; j++ )
162
+ {
163
+ static const uint8_t list_dist[2][8] = {{0,1,1,1,1,1,1,1},{1,1,3,3,3,3,3,2}};
164
+ for( int k = 0; k < 2; k++ )
165
+ mvs[j][k] = (rand()&127) - 64;
166
+ propagate_amount[j] = rand()&32767;
167
+ lowres_costs[j] = list_dist[list][rand()&7] << LOWRES_COST_SHIFT;
168
+ }
169
+
170
+ call_c1( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
171
+ call_a1( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
172
+
173
+ for( int j = 0; j < size && ok; j++ )
174
+ {
175
+ ok &= abs(ref_costsa[j] - ref_costsc[j]) <= 1;
176
+ if( !ok )
177
+ fprintf( stderr, "mbtree_propagate_list FAILED at %d: %d !~= %d\n", j, ref_costsc[j], ref_costsa[j] );
178
+ }
179
+
180
+ call_c2( mc_c.mbtree_propagate_list, &h, ref_costsc, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
181
+ call_a2( mc_a.mbtree_propagate_list, &h, ref_costsa, mvs, propagate_amount, lowres_costs, bipred_weight, 0, width, list );
182
+ }
183
+ }
184
+ report( "mbtree :" );
185
+
186
if( mc_a.memcpy_aligned != mc_ref.memcpy_aligned )
187
{
188
set_func_name( "memcpy_aligned" );
189
190
}
191
192
h->param.rc.i_qp_min = 0;
193
- h->param.rc.i_qp_max = QP_MAX;
194
+ h->param.rc.i_qp_max = QP_MAX_SPEC;
195
x264_cqm_init( h );
196
x264_quant_init( h, 0, &qf_c );
197
x264_quant_init( h, cpu_ref, &qf_ref );
198
199
{
200
*cpu_ref = *cpu_new;
201
*cpu_new |= flags;
202
-#if BROKEN_STACK_ALIGNMENT
203
+#if STACK_ALIGNMENT < 16
204
*cpu_new |= X264_CPU_STACK_MOD4;
205
#endif
206
if( *cpu_new & X264_CPU_SSE2_IS_FAST )
207
208
ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" );
209
cpu1 &= ~X264_CPU_SLOW_CTZ;
210
}
211
- if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN )
212
- {
213
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" );
214
- cpu1 &= ~X264_CPU_SSE_MISALIGN;
215
- }
216
if( x264_cpu_detect() & X264_CPU_LZCNT )
217
{
218
ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" );
219
x264-snapshot-20130723-2245.tar.bz2/tools/cltostr.pl -> x264-snapshot-20140321-2245.tar.bz2/tools/cltostr.pl
Changed
9
1
2
# Perl script used for compiling OpenCL src into x264 binary
3
#
4
-# Copyright (C) 2013 x264 project
5
+# Copyright (C) 2013-2014 x264 project
6
# Authors: Steve Borho <sborho@multicorewareinc.com>
7
8
use Digest::MD5 qw(md5_hex);
9
x264-snapshot-20130723-2245.tar.bz2/version.sh -> x264-snapshot-20140321-2245.tar.bz2/version.sh
Changed
30
1
2
-#!/bin/bash
3
+#!/bin/sh
4
[ -n "$1" ] && cd $1
5
+
6
+git_version() {
7
+trap 'rm -f config.git-hash' EXIT
8
git rev-list HEAD | sort > config.git-hash
9
LOCALVER=`wc -l config.git-hash | awk '{print $1}'`
10
if [ $LOCALVER \> 1 ] ; then
11
12
VER="${VER}M"
13
fi
14
VER="$VER $(git rev-list HEAD -n 1 | cut -c 1-7)"
15
- echo "#define X264_VERSION \" r$VER\""
16
-else
17
- echo "#define X264_VERSION \"\""
18
- VER="x"
19
+ VERSION=" r$VER"
20
fi
21
-rm -f config.git-hash
22
+}
23
+
24
+VER="x"
25
+VERSION=""
26
+[ -d .git ] && (type git >/dev/null 2>&1) && git_version
27
+echo "#define X264_VERSION \"$VERSION\""
28
API=`grep '#define X264_BUILD' < x264.h | sed -e 's/.* \([1-9][0-9]*\).*/\1/'`
29
echo "#define X264_POINTVER \"0.$API.$VER\""
30
x264-snapshot-20130723-2245.tar.bz2/x264.c -> x264-snapshot-20140321-2245.tar.bz2/x264.c
Changed
418
1
2
/*****************************************************************************
3
* x264: top-level x264cli functions
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Loren Merritt <lorenm@u.washington.edu>
9
* Laurent Aimar <fenrir@via.ecp.fr>
10
* Steven Walters <kemuri9@gmail.com>
11
* Jason Garrett-Glaser <darkshikari@gmail.com>
12
* Kieran Kunhya <kieran@kunhya.com>
13
+ * Henrik Gramner <henrik@gramner.com>
14
*
15
* This program is free software; you can redistribute it and/or modify
16
* it under the terms of the GNU General Public License as published by
17
18
* For more information, contact us at licensing@x264.com.
19
*****************************************************************************/
20
21
+#ifdef _WIN32
22
+/* The following two defines must be located before the inclusion of any system header files. */
23
+#define WINVER 0x0500
24
+#define _WIN32_WINNT 0x0500
25
+#include <windows.h>
26
+#include <io.h> /* _setmode() */
27
+#include <fcntl.h> /* _O_BINARY */
28
+#endif
29
+
30
#include <signal.h>
31
#define _GNU_SOURCE
32
#include <getopt.h>
33
34
35
#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
36
37
-#ifdef _WIN32
38
-#include <windows.h>
39
-#else
40
-#define GetConsoleTitle(t,n)
41
-#define SetConsoleTitle(t)
42
-#endif
43
-
44
#if HAVE_LAVF
45
#undef DECLARE_ALIGNED
46
#include <libavformat/avformat.h>
47
48
#include <ffms.h>
49
#endif
50
51
+#ifdef _WIN32
52
+#define CONSOLE_TITLE_SIZE 200
53
+static wchar_t org_console_title[CONSOLE_TITLE_SIZE] = L"";
54
+
55
+void x264_cli_set_console_title( const char *title )
56
+{
57
+ wchar_t title_utf16[CONSOLE_TITLE_SIZE];
58
+ if( utf8_to_utf16( title, title_utf16 ) )
59
+ SetConsoleTitleW( title_utf16 );
60
+}
61
+
62
+static int utf16_to_ansi( const wchar_t *utf16, char *ansi, int size )
63
+{
64
+ int invalid;
65
+ return WideCharToMultiByte( CP_ACP, WC_NO_BEST_FIT_CHARS, utf16, -1, ansi, size, NULL, &invalid ) && !invalid;
66
+}
67
+
68
+/* Some external libraries doesn't support Unicode in filenames,
69
+ * as a workaround we can try to get an ANSI filename instead. */
70
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file )
71
+{
72
+ wchar_t filename_utf16[MAX_PATH];
73
+ if( utf8_to_utf16( filename, filename_utf16 ) )
74
+ {
75
+ if( create_file )
76
+ {
77
+ /* Create the file using the Unicode filename if it doesn't already exist. */
78
+ FILE *fh = _wfopen( filename_utf16, L"ab" );
79
+ if( fh )
80
+ fclose( fh );
81
+ }
82
+
83
+ /* Check if the filename already is valid ANSI. */
84
+ if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
85
+ return 1;
86
+
87
+ /* Check for a legacy 8.3 short filename. */
88
+ int short_length = GetShortPathNameW( filename_utf16, filename_utf16, MAX_PATH );
89
+ if( short_length > 0 && short_length < MAX_PATH )
90
+ if( utf16_to_ansi( filename_utf16, ansi_filename, size ) )
91
+ return 1;
92
+ }
93
+ return 0;
94
+}
95
+
96
+/* Retrieve command line arguments as UTF-8. */
97
+static int get_argv_utf8( int *argc_ptr, char ***argv_ptr )
98
+{
99
+ int ret = 0;
100
+ wchar_t **argv_utf16 = CommandLineToArgvW( GetCommandLineW(), argc_ptr );
101
+ if( argv_utf16 )
102
+ {
103
+ int argc = *argc_ptr;
104
+ int offset = (argc+1) * sizeof(char*);
105
+ int size = offset;
106
+
107
+ for( int i = 0; i < argc; i++ )
108
+ size += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL );
109
+
110
+ char **argv = *argv_ptr = malloc( size );
111
+ if( argv )
112
+ {
113
+ for( int i = 0; i < argc; i++ )
114
+ {
115
+ argv[i] = (char*)argv + offset;
116
+ offset += WideCharToMultiByte( CP_UTF8, 0, argv_utf16[i], -1, argv[i], size-offset, NULL, NULL );
117
+ }
118
+ argv[argc] = NULL;
119
+ ret = 1;
120
+ }
121
+ LocalFree( argv_utf16 );
122
+ }
123
+ return ret;
124
+}
125
+#endif
126
+
127
/* Ctrl-C handler */
128
static volatile int b_ctrl_c = 0;
129
-static int b_exit_on_ctrl_c = 0;
130
static void sigint_handler( int a )
131
{
132
- if( b_exit_on_ctrl_c )
133
- exit(0);
134
b_ctrl_c = 1;
135
}
136
137
-static char UNUSED originalCTitle[200] = "";
138
-
139
typedef struct {
140
int b_progress;
141
int i_seek;
142
143
"raw",
144
"mkv",
145
"flv",
146
-#if HAVE_GPAC
147
+#if HAVE_GPAC || HAVE_LSMASH
148
"mp4",
149
#endif
150
0
151
152
fprintf( stderr, "%s [%s]: ", name, s_level );
153
va_list arg;
154
va_start( arg, fmt );
155
- vfprintf( stderr, fmt, arg );
156
+ x264_vfprintf( stderr, fmt, arg );
157
va_end( arg );
158
}
159
160
161
return;
162
va_list arg;
163
va_start( arg, fmt );
164
- vfprintf( stderr, fmt, arg );
165
+ x264_vfprintf( stderr, fmt, arg );
166
va_end( arg );
167
}
168
169
170
FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" )
171
172
#ifdef _WIN32
173
- _setmode(_fileno(stdin), _O_BINARY);
174
- _setmode(_fileno(stdout), _O_BINARY);
175
-#endif
176
+ FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" )
177
178
- GetConsoleTitle( originalCTitle, sizeof(originalCTitle) );
179
+ GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
180
+ _setmode( _fileno( stdin ), _O_BINARY );
181
+ _setmode( _fileno( stdout ), _O_BINARY );
182
+ _setmode( _fileno( stderr ), _O_BINARY );
183
+#endif
184
185
/* Parse command line */
186
if( parse( argc, argv, ¶m, &opt ) < 0 )
187
ret = -1;
188
189
+#ifdef _WIN32
190
/* Restore title; it can be changed by input modules */
191
- SetConsoleTitle( originalCTitle );
192
+ SetConsoleTitleW( org_console_title );
193
+#endif
194
195
/* Control-C handler */
196
signal( SIGINT, sigint_handler );
197
198
if( opt.qpfile )
199
fclose( opt.qpfile );
200
201
- SetConsoleTitle( originalCTitle );
202
+#ifdef _WIN32
203
+ SetConsoleTitleW( org_console_title );
204
+ free( argv );
205
+#endif
206
207
return ret;
208
}
209
210
printf( INDENT );
211
for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
212
{
213
- printf( "%s", x264_cli_csps[i].name );
214
- if( i+1 < X264_CSP_CLI_MAX )
215
- printf( ", " );
216
+ if( x264_cli_csps[i].name )
217
+ {
218
+ printf( "%s", x264_cli_csps[i].name );
219
+ if( i+1 < X264_CSP_CLI_MAX )
220
+ printf( ", " );
221
+ }
222
}
223
#if HAVE_LAVF
224
printf( "\n" );
225
printf( " - valid csps for `lavf' demuxer:\n" );
226
printf( INDENT );
227
size_t line_len = strlen( INDENT );
228
- for( enum PixelFormat i = PIX_FMT_NONE+1; i < PIX_FMT_NB; i++ )
229
+ for( enum PixelFormat i = AV_PIX_FMT_NONE+1; i < AV_PIX_FMT_NB; i++ )
230
{
231
const char *pfname = av_get_pix_fmt_name( i );
232
if( pfname )
233
234
}
235
printf( "%s", pfname );
236
line_len += name_len;
237
- if( i+1 < PIX_FMT_NB )
238
+ if( i+1 < AV_PIX_FMT_NB )
239
{
240
printf( ", " );
241
line_len += 2;
242
243
" .264 -> Raw bytestream\n"
244
" .mkv -> Matroska\n"
245
" .flv -> Flash Video\n"
246
- " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
247
+ " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n"
248
"Output bit depth: %d (configured at compile time)\n"
249
"\n"
250
"Options:\n"
251
252
"no",
253
#endif
254
#if HAVE_GPAC
255
- "yes",
256
+ "gpac",
257
+#elif HAVE_LSMASH
258
+ "lsmash",
259
#else
260
"no",
261
#endif
262
263
264
H2( " --nal-hrd <string> Signal HRD information (requires vbv-bufsize)\n"
265
" - none, vbr, cbr (cbr not allowed in .mp4)\n" );
266
+ H2( " --filler Force hard-CBR and generate filler (implied by\n"
267
+ " --nal-hrd cbr)\n" );
268
H2( " --pic-struct Force pic_struct in Picture Timing SEI\n" );
269
H2( " --crop-rect <string> Add 'left,top,right,bottom' to the bitstream-level\n"
270
" cropping rectangle\n" );
271
272
H0( " --frames <integer> Maximum number of frames to encode\n" );
273
H0( " --level <string> Specify level (as defined by Annex A)\n" );
274
H1( " --bluray-compat Enable compatibility hacks for Blu-ray support\n" );
275
+ H1( " --avcintra-class <integer> Use compatibility hacks for AVC-Intra class\n"
276
+ " - 50, 100, 200\n" );
277
H1( " --stitchable Don't optimize headers based on video content\n"
278
" Ensures ability to recombine a segmented encode\n" );
279
H1( "\n" );
280
281
H2( " --no-asm Disable all CPU optimizations\n" );
282
H2( " --opencl Enable use of OpenCL\n" );
283
H2( " --opencl-clbin <string> Specify path of compiled OpenCL kernel cache\n" );
284
- H2( " --opencl-device <integer> Specify OpenCL device ordinal\n" );
285
- H2( " --visualize Show MB types overlayed on the encoded video\n" );
286
+ H2( " --opencl-device <integer> Specify OpenCL device ordinal\n" );
287
H2( " --dump-yuv <string> Save reconstructed frames\n" );
288
H2( " --sps-id <integer> Set SPS and PPS id numbers [%d]\n", defaults->i_sps_id );
289
H2( " --aud Use access unit delimiters\n" );
290
291
OPT_THREAD_INPUT,
292
OPT_QUIET,
293
OPT_NOPROGRESS,
294
- OPT_VISUALIZE,
295
OPT_LONGHELP,
296
OPT_PROFILE,
297
OPT_PRESET,
298
299
{ "b-pyramid", required_argument, NULL, 0 },
300
{ "open-gop", no_argument, NULL, 0 },
301
{ "bluray-compat", no_argument, NULL, 0 },
302
+ { "avcintra-class", required_argument, NULL, 0 },
303
{ "min-keyint", required_argument, NULL, 'i' },
304
{ "keyint", required_argument, NULL, 'I' },
305
{ "intra-refresh", no_argument, NULL, 0 },
306
307
{ "verbose", no_argument, NULL, 'v' },
308
{ "log-level", required_argument, NULL, OPT_LOG_LEVEL },
309
{ "no-progress", no_argument, NULL, OPT_NOPROGRESS },
310
- { "visualize", no_argument, NULL, OPT_VISUALIZE },
311
{ "dump-yuv", required_argument, NULL, 0 },
312
{ "sps-id", required_argument, NULL, 0 },
313
{ "aud", no_argument, NULL, 0 },
314
315
{ "output-csp", required_argument, NULL, OPT_OUTPUT_CSP },
316
{ "input-range", required_argument, NULL, OPT_INPUT_RANGE },
317
{ "stitchable", no_argument, NULL, 0 },
318
+ { "filler", no_argument, NULL, 0 },
319
{0, 0, 0, 0}
320
};
321
322
323
324
if( !strcasecmp( ext, "mp4" ) )
325
{
326
-#if HAVE_GPAC
327
+#if HAVE_GPAC || HAVE_LSMASH
328
cli_output = mp4_output;
329
param->b_annexb = 0;
330
param->b_repeat_headers = 0;
331
332
b_regular = b_regular && x264_is_regular_file_path( filename );
333
if( b_regular )
334
{
335
- FILE *f = fopen( filename, "r" );
336
+ FILE *f = x264_fopen( filename, "r" );
337
if( f )
338
{
339
b_regular = x264_is_regular_file( f );
340
341
int csp = info->csp & X264_CSP_MASK;
342
if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) )
343
param->i_csp = X264_CSP_I420;
344
- else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_NV16) )
345
+ else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_V210) )
346
param->i_csp = X264_CSP_I422;
347
else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) )
348
param->i_csp = X264_CSP_I444;
349
350
input_opt.index_file = optarg;
351
break;
352
case OPT_QPFILE:
353
- opt->qpfile = fopen( optarg, "rb" );
354
+ opt->qpfile = x264_fopen( optarg, "rb" );
355
FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
356
if( !x264_is_regular_file( opt->qpfile ) )
357
{
358
359
case OPT_NOPROGRESS:
360
opt->b_progress = 0;
361
break;
362
- case OPT_VISUALIZE:
363
-#if HAVE_VISUALIZE
364
- param->b_visualize = 1;
365
- b_exit_on_ctrl_c = 1;
366
-#else
367
- x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
368
-#endif
369
- break;
370
case OPT_TUNE:
371
case OPT_PRESET:
372
break;
373
374
tcfile_name = optarg;
375
break;
376
case OPT_TCFILE_OUT:
377
- opt->tcfile_out = fopen( optarg, "wb" );
378
+ opt->tcfile_out = x264_fopen( optarg, "wb" );
379
FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
380
break;
381
case OPT_TIMEBASE:
382
383
info.fps_den = param->i_fps_den;
384
info.fullrange = input_opt.input_range == RANGE_PC;
385
info.interlaced = param->b_interlaced;
386
- info.sar_width = param->vui.i_sar_width;
387
- info.sar_height = param->vui.i_sar_height;
388
+ if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
389
+ {
390
+ info.sar_width = param->vui.i_sar_width;
391
+ info.sar_height = param->vui.i_sar_height;
392
+ }
393
info.tff = param->b_tff;
394
info.vfr = param->b_vfr_input;
395
396
397
#endif
398
399
/* override detected values by those specified by the user */
400
- if( param->vui.i_sar_width && param->vui.i_sar_height )
401
+ if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
402
{
403
info.sar_width = param->vui.i_sar_width;
404
info.sar_height = param->vui.i_sar_height;
405
406
eta/3600, (eta/60)%60, eta%60 );
407
}
408
else
409
- {
410
sprintf( buf, "x264 %d frames: %.2f fps, %.2f kb/s", i_frame, fps, bitrate );
411
- }
412
fprintf( stderr, "%s \r", buf+5 );
413
- SetConsoleTitle( buf );
414
+ x264_cli_set_console_title( buf );
415
fflush( stderr ); // needed in windows
416
return i_time;
417
}
418
x264-snapshot-20130723-2245.tar.bz2/x264.h -> x264-snapshot-20140321-2245.tar.bz2/x264.h
Changed
182
1
2
/*****************************************************************************
3
* x264.h: x264 public header
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
12
#include "x264_config.h"
13
14
-#define X264_BUILD 135
15
+#define X264_BUILD 142
16
17
/* Application developers planning to link against a shared library version of
18
* libx264 from a Microsoft Visual Studio or similar development environment
19
20
int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
21
int i_last_mb; /* If this NAL is a slice, the index of the last MB in the slice. */
22
23
- /* Size of payload in bytes. */
24
+ /* Size of payload (including any padding) in bytes. */
25
int i_payload;
26
/* If param->b_annexb is set, Annex-B bytestream with startcode.
27
* Otherwise, startcode is replaced with a 4-byte size.
28
* This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
29
uint8_t *p_payload;
30
+
31
+ /* Size of padding in bytes. */
32
+ int i_padding;
33
} x264_nal_t;
34
35
/****************************************************************************
36
37
#define X264_CPU_SSSE3 0x0000040
38
#define X264_CPU_SSE4 0x0000080 /* SSE4.1 */
39
#define X264_CPU_SSE42 0x0000100 /* SSE4.2 */
40
-#define X264_CPU_SSE_MISALIGN 0x0000200 /* Phenom support for misaligned SSE instruction arguments */
41
-#define X264_CPU_LZCNT 0x0000400 /* Phenom support for "leading zero count" instruction. */
42
-#define X264_CPU_AVX 0x0000800 /* AVX support: requires OS support even if YMM registers aren't used. */
43
-#define X264_CPU_XOP 0x0001000 /* AMD XOP */
44
-#define X264_CPU_FMA4 0x0002000 /* AMD FMA4 */
45
-#define X264_CPU_AVX2 0x0004000 /* AVX2 */
46
-#define X264_CPU_FMA3 0x0008000 /* Intel FMA3 */
47
-#define X264_CPU_BMI1 0x0010000 /* BMI1 */
48
-#define X264_CPU_BMI2 0x0020000 /* BMI2 */
49
+#define X264_CPU_LZCNT 0x0000200 /* Phenom support for "leading zero count" instruction. */
50
+#define X264_CPU_AVX 0x0000400 /* AVX support: requires OS support even if YMM registers aren't used. */
51
+#define X264_CPU_XOP 0x0000800 /* AMD XOP */
52
+#define X264_CPU_FMA4 0x0001000 /* AMD FMA4 */
53
+#define X264_CPU_AVX2 0x0002000 /* AVX2 */
54
+#define X264_CPU_FMA3 0x0004000 /* Intel FMA3 */
55
+#define X264_CPU_BMI1 0x0008000 /* BMI1 */
56
+#define X264_CPU_BMI2 0x0010000 /* BMI2 */
57
/* x86 modifiers */
58
-#define X264_CPU_CACHELINE_32 0x0040000 /* avoid memory loads that span the border between two cachelines */
59
-#define X264_CPU_CACHELINE_64 0x0080000 /* 32/64 is the size of a cacheline in bytes */
60
-#define X264_CPU_SSE2_IS_SLOW 0x0100000 /* avoid most SSE2 functions on Athlon64 */
61
-#define X264_CPU_SSE2_IS_FAST 0x0200000 /* a few functions are only faster on Core2 and Phenom */
62
-#define X264_CPU_SLOW_SHUFFLE 0x0400000 /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
63
-#define X264_CPU_STACK_MOD4 0x0800000 /* if stack is only mod4 and not mod16 */
64
-#define X264_CPU_SLOW_CTZ 0x1000000 /* BSR/BSF x86 instructions are really slow on some CPUs */
65
-#define X264_CPU_SLOW_ATOM 0x2000000 /* The Atom is terrible: slow SSE unaligned loads, slow
66
+#define X264_CPU_CACHELINE_32 0x0020000 /* avoid memory loads that span the border between two cachelines */
67
+#define X264_CPU_CACHELINE_64 0x0040000 /* 32/64 is the size of a cacheline in bytes */
68
+#define X264_CPU_SSE2_IS_SLOW 0x0080000 /* avoid most SSE2 functions on Athlon64 */
69
+#define X264_CPU_SSE2_IS_FAST 0x0100000 /* a few functions are only faster on Core2 and Phenom */
70
+#define X264_CPU_SLOW_SHUFFLE 0x0200000 /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */
71
+#define X264_CPU_STACK_MOD4 0x0400000 /* if stack is only mod4 and not mod16 */
72
+#define X264_CPU_SLOW_CTZ 0x0800000 /* BSR/BSF x86 instructions are really slow on some CPUs */
73
+#define X264_CPU_SLOW_ATOM 0x1000000 /* The Atom is terrible: slow SSE unaligned loads, slow
74
* SIMD multiplies, slow SIMD variable shifts, slow pshufb,
75
* cacheline split penalties -- gather everything here that
76
* isn't shared by other CPUs to avoid making half a dozen
77
* new SLOW flags. */
78
-#define X264_CPU_SLOW_PSHUFB 0x4000000 /* such as on the Intel Atom */
79
-#define X264_CPU_SLOW_PALIGNR 0x8000000 /* such as on the AMD Bobcat */
80
+#define X264_CPU_SLOW_PSHUFB 0x2000000 /* such as on the Intel Atom */
81
+#define X264_CPU_SLOW_PALIGNR 0x4000000 /* such as on the AMD Bobcat */
82
83
/* PowerPC */
84
#define X264_CPU_ALTIVEC 0x0000001
85
86
#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */
87
#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */
88
#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */
89
-#define X264_CSP_I444 0x0007 /* yuv 4:4:4 planar */
90
-#define X264_CSP_YV24 0x0008 /* yvu 4:4:4 planar */
91
-#define X264_CSP_BGR 0x0009 /* packed bgr 24bits */
92
-#define X264_CSP_BGRA 0x000a /* packed bgr 32bits */
93
-#define X264_CSP_RGB 0x000b /* packed rgb 24bits */
94
-#define X264_CSP_MAX 0x000c /* end of list */
95
+#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */
96
+#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */
97
+#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */
98
+#define X264_CSP_BGR 0x000a /* packed bgr 24bits */
99
+#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */
100
+#define X264_CSP_RGB 0x000c /* packed rgb 24bits */
101
+#define X264_CSP_MAX 0x000d /* end of list */
102
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
103
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
104
105
106
int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
107
int b_open_gop;
108
int b_bluray_compat;
109
+ int i_avcintra_class;
110
111
int b_deblocking_filter;
112
int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
113
114
int b_constrained_intra;
115
116
int i_cqm_preset;
117
- char *psz_cqm_file; /* JM format */
118
+ char *psz_cqm_file; /* filename (in UTF-8) of CQM file, JM format */
119
uint8_t cqm_4iy[16]; /* used only if i_cqm_preset == X264_CQM_CUSTOM */
120
uint8_t cqm_4py[16];
121
uint8_t cqm_4ic[16];
122
123
void (*pf_log)( void *, int i_level, const char *psz, va_list );
124
void *p_log_private;
125
int i_log_level;
126
- int b_visualize;
127
int b_full_recon; /* fully reconstruct frames, even when not necessary for encoding. Implied by psz_dump_yuv */
128
- char *psz_dump_yuv; /* filename for reconstructed frames */
129
+ char *psz_dump_yuv; /* filename (in UTF-8) for reconstructed frames */
130
131
/* Encoder analyser parameters */
132
struct
133
134
float f_ip_factor;
135
float f_pb_factor;
136
137
+ /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
138
+ * Implied by NAL-HRD CBR. */
139
+ int b_filler;
140
+
141
int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */
142
float f_aq_strength;
143
int b_mb_tree; /* Macroblock-tree ratecontrol. */
144
145
146
/* 2pass */
147
int b_stat_write; /* Enable stat writing in psz_stat_out */
148
- char *psz_stat_out;
149
+ char *psz_stat_out; /* output filename (in UTF-8) of the 2pass stats file */
150
int b_stat_read; /* Read stat from psz_stat_in and use it */
151
- char *psz_stat_in;
152
+ char *psz_stat_in; /* input filename (in UTF-8) of the 2pass stats file */
153
154
/* 2pass params (same as ffmpeg ones) */
155
float f_qcompress; /* 0.0 => cbr, 1.0 => constant qp */
156
157
int b_opencl; /* use OpenCL when available */
158
int i_opencl_device; /* specify count of GPU devices to skip, for CLI users */
159
void *opencl_device_id; /* pass explicit cl_device_id as void*, for API users */
160
- char *psz_clbin_file; /* compiled OpenCL kernel cache file */
161
+ char *psz_clbin_file; /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
162
163
/* Slicing parameters */
164
int i_slice_max_size; /* Max size per slice in bytes; includes estimated NAL overhead. */
165
166
/* x264_encoder_headers:
167
* return the SPS and PPS that will be used for the whole stream.
168
* *pi_nal is the number of NAL units outputted in pp_nal.
169
+ * returns the number of bytes in the returned NALs.
170
* returns negative on error.
171
* the payloads of all output NALs are guaranteed to be sequential in memory. */
172
int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
173
/* x264_encoder_encode:
174
* encode one picture.
175
* *pi_nal is the number of NAL units outputted in pp_nal.
176
- * returns negative on error, zero if no NAL units returned.
177
+ * returns the number of bytes in the returned NALs.
178
+ * returns negative on error and zero if no NAL units returned.
179
* the payloads of all output NALs are guaranteed to be sequential in memory. */
180
int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
181
/* x264_encoder_close:
182
x264-snapshot-20130723-2245.tar.bz2/x264cli.h -> x264-snapshot-20140321-2245.tar.bz2/x264cli.h
Changed
24
1
2
/*****************************************************************************
3
* x264cli.h: x264cli common
4
*****************************************************************************
5
- * Copyright (C) 2003-2013 x264 project
6
+ * Copyright (C) 2003-2014 x264 project
7
*
8
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
9
* Loren Merritt <lorenm@u.washington.edu>
10
11
void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
12
void x264_cli_printf( int i_level, const char *fmt, ... );
13
14
+#ifdef _WIN32
15
+void x264_cli_set_console_title( const char *title );
16
+int x264_ansi_filename( const char *filename, char *ansi_filename, int size, int create_file );
17
+#else
18
+#define x264_cli_set_console_title( title )
19
+#endif
20
+
21
#define RETURN_IF_ERR( cond, name, ret, ... )\
22
if( cond )\
23
{\
24
x264-snapshot-20130723-2245.tar.bz2/x264dll.c -> x264-snapshot-20140321-2245.tar.bz2/x264dll.c
Changed
19
1
2
/*****************************************************************************
3
* x264dll: x264 DLLMain for win32
4
*****************************************************************************
5
- * Copyright (C) 2009-2013 x264 project
6
+ * Copyright (C) 2009-2014 x264 project
7
*
8
* Authors: Anton Mitrofanov <BugMaster@narod.ru>
9
*
10
11
#include <windows.h>
12
13
/* Callback for our DLL so we can initialize pthread */
14
-BOOL WINAPI DllMain( HANDLE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
15
+BOOL WINAPI DllMain( HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved )
16
{
17
#if PTW32_STATIC_LIB
18
switch( fdwReason )
19
x264-snapshot-20130723-2245.tar.bz2/x264res.rc -> x264-snapshot-20140321-2245.tar.bz2/x264res.rc
Changed
39
1
2
/*****************************************************************************
3
* x264res.rc: windows resource file
4
*****************************************************************************
5
- * Copyright (C) 2012-2013 x264 project
6
+ * Copyright (C) 2012-2014 x264 project
7
*
8
- * Authors: Henrik Gramner <hengar-6@student.ltu.se>
9
+ * Authors: Henrik Gramner <henrik@gramner.com>
10
*
11
* This program is free software; you can redistribute it and/or modify
12
* it under the terms of the GNU General Public License as published by
13
14
BEGIN
15
BLOCK "StringFileInfo"
16
BEGIN
17
- BLOCK "040904E4"
18
+ BLOCK "040904B0"
19
BEGIN
20
VALUE "CompanyName", "x264 project"
21
#ifdef DLL
22
23
#endif
24
VALUE "FileVersion", X264_POINTVER
25
VALUE "InternalName", "x264"
26
- VALUE "LegalCopyright", "Copyright (C) 2003-2013 x264 project"
27
+ VALUE "LegalCopyright", "Copyright (C) 2003-2014 x264 project"
28
#ifdef DLL
29
VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll"
30
#else
31
32
33
BLOCK "VarFileInfo"
34
BEGIN
35
- VALUE "Translation", 0x0409, 0x04E4
36
+ VALUE "Translation", 0x0409, 0x04B0 /* U.S. English (Unicode) */
37
END
38
END
39