Projects
Essentials
libx264
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 10
View file
libx264.changes
Changed
@@ -1,4 +1,9 @@ ------------------------------------------------------------------- +Wed Aug 5 13:04:18 UTC 2015 - idonmez@suse.com + +- update to 20150804 snapshot + +------------------------------------------------------------------- Sun Mar 1 09:33:42 UTC 2015 - i@margueirte.su - update version 20141218
View file
libx264.spec
Changed
@@ -16,8 +16,8 @@ # -%define soname 142 -%define svn 20141218 +%define soname 148 +%define svn 20150804 Name: libx264 Version: 0.%{soname}svn%{svn} Release: 0
View file
x264-snapshot-20141218-2245.tar.bz2/extras/gas-preprocessor.pl
Deleted
@@ -1,253 +0,0 @@ -#!/usr/bin/env perl -# by David Conrad -# This code is licensed under GPLv2 or later; go to gnu.org to read it -# (not that it much matters for an asm preprocessor) -# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc" -use strict; - -# Apple's gas is ancient and doesn't support modern preprocessing features like -# .rept and has ugly macro syntax, among other things. Thus, this script -# implements the subset of the gas preprocessor used by x264 and ffmpeg -# that isn't supported by Apple's gas. - -my @gcc_cmd = @ARGV; -my @preprocess_c_cmd; - -if (grep /\.c$/, @gcc_cmd) { - # C file (inline asm?) - compile - @preprocess_c_cmd = (@gcc_cmd, "-S"); -} elsif (grep /\.S$/, @gcc_cmd) { - # asm file, just do C preprocessor - @preprocess_c_cmd = (@gcc_cmd, "-E"); -} else { - die "Unrecognized input filetype"; -} -@gcc_cmd = map { /\.[cS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd; -@preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd; - -open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor"; - -my $current_macro = ''; -my %macro_lines; -my %macro_args; -my %macro_args_default; - -my @pass1_lines; - -# pass 1: parse .macro -# note that the handling of arguments is probably overly permissive vs. gas -# but it should be the same for valid cases -while (<ASMFILE>) { - # comment out unsupported directives - s/\.type/@.type/x; - s/\.func/@.func/x; - s/\.endfunc/@.endfunc/x; - s/\.ltorg/@.ltorg/x; - s/\.size/@.size/x; - s/\.fpu/@.fpu/x; - - # the syntax for these is a little different - s/\.global/.globl/x; - # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const - s/(.*)\.rodata/.const_data/x; - s/\.int/.long/x; - s/\.float/.single/x; - - # catch unknown section names that aren't mach-o style (with a comma) - if (/.section ([^,]*)$/) { - die ".section $1 unsupported; figure out the mach-o section name and add it"; - } - - # macros creating macros is not handled (is that valid?) - if (/\.macro\s+([\d\w\.]+)\s*(.*)/) { - $current_macro = $1; - - # commas in the argument list are optional, so only use whitespace as the separator - my $arglist = $2; - $arglist =~ s/,/ /g; - - my @args = split(/\s+/, $arglist); - foreach my $i (0 .. $#args) { - my @argpair = split(/=/, $args[$i]); - $macro_args{$current_macro}[$i] = $argpair[0]; - $argpair[0] =~ s/:vararg$//; - $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1]; - } - # ensure %macro_lines has the macro name added as a key - $macro_lines{$current_macro} = []; - } elsif (/\.endm/) { - if (!$current_macro) { - die "ERROR: .endm without .macro"; - } - $current_macro = ''; - } elsif ($current_macro) { - push(@{$macro_lines{$current_macro}}, $_); - } else { - expand_macros($_); - } -} - -sub expand_macros { - my $line = @_[0]; - if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) { - push(@pass1_lines, $1); - my $macro = $2; - - # commas are optional here too, but are syntactically important because - # parameters can be blank - my @arglist = split(/,/, $3); - my @args; - foreach (@arglist) { - my @whitespace_split = split(/\s+/, $_); - if (!@whitespace_split) { - push(@args, ''); - } else { - foreach (@whitespace_split) { - if (length($_)) { - push(@args, $_); - } - } - } - } - - my %replacements; - if ($macro_args_default{$macro}){ - %replacements = %{$macro_args_default{$macro}}; - } - - # construct hashtable of text to replace - foreach my $i (0 .. $#args) { - my $argname = $macro_args{$macro}[$i]; - - if ($args[$i] =~ m/=/) { - # arg=val references the argument name - # XXX: I'm not sure what the expected behaviour if a lot of - # these are mixed with unnamed args - my @named_arg = split(/=/, $args[$i]); - $replacements{$named_arg[0]} = $named_arg[1]; - } elsif ($i > $#{$macro_args{$macro}}) { - # more args given than the macro has named args - # XXX: is vararg allowed on arguments before the last? - $argname = $macro_args{$macro}[-1]; - if ($argname =~ s/:vararg$//) { - $replacements{$argname} .= ", $args[$i]"; - } else { - die "Too many arguments to macro $macro"; - } - } else { - $argname =~ s/:vararg$//; - $replacements{$argname} = $args[$i]; - } - } - - # apply replacements as regex - foreach (@{$macro_lines{$macro}}) { - my $macro_line = $_; - # do replacements by longest first, this avoids wrong replacement - # when argument names are subsets of each other - foreach (reverse sort {length $a <=> length $b} keys %replacements) { - $macro_line =~ s/\\$_/$replacements{$_}/g; - } - $macro_line =~ s/\\\(\)//g; # remove \() - expand_macros($macro_line); - } - } else { - push(@pass1_lines, $line); - } -} - -close(ASMFILE) or exit 1; -open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler"; - -my @sections; -my $num_repts; -my $rept_lines; - -my %literal_labels; # for ldr <reg>, =<expr> -my $literal_num = 0; - -# pass 2: parse .rept and .if variants -# NOTE: since we don't implement a proper parser, using .rept with a -# variable assigned from .set is not supported -foreach my $line (@pass1_lines) { - # textual comparison .if - # this assumes nothing else on the same line - if ($line =~ /\.ifnb\s+(.*)/) { - if ($1) { - $line = ".if 1\n"; - } else { - $line = ".if 0\n"; - } - } elsif ($line =~ /\.ifb\s+(.*)/) { - if ($1) { - $line = ".if 0\n"; - } else { - $line = ".if 1\n"; - } - } elsif ($line =~ /\.ifc\s+(.*)\s*,\s*(.*)/) { - if ($1 eq $2) { - $line = ".if 1\n"; - } else { - $line = ".if 0\n"; - } - } - - # handle .previous (only with regard to .section not .subsection) - if ($line =~ /\.(section|text|const_data)/) { - push(@sections, $line); - } elsif ($line =~ /\.previous/) { - if (!$sections[-2]) {
View file
x264-snapshot-20141218-2245.tar.bz2/extras/windowsPorts
Deleted
-(directory)
View file
x264-snapshot-20141218-2245.tar.bz2/extras/windowsPorts/basicDataTypeConversions.h
Deleted
@@ -1,85 +0,0 @@ -#ifndef __DATA_TYPE_CONVERSIONS_H__ -#define __DATA_TYPE_CONVERSIONS_H__ - -#include <stdint.h> -#include <wchar.h> - -#ifdef __cplusplus -namespace avxsynth { -#endif // __cplusplus - -typedef int64_t __int64; -typedef int32_t __int32; -#ifdef __cplusplus -typedef bool BOOL; -#else -typedef uint32_t BOOL; -#endif // __cplusplus -typedef void* HMODULE; -typedef void* LPVOID; -typedef void* PVOID; -typedef PVOID HANDLE; -typedef HANDLE HWND; -typedef HANDLE HINSTANCE; -typedef void* HDC; -typedef void* HBITMAP; -typedef void* HICON; -typedef void* HFONT; -typedef void* HGDIOBJ; -typedef void* HBRUSH; -typedef void* HMMIO; -typedef void* HACMSTREAM; -typedef void* HACMDRIVER; -typedef void* HIC; -typedef void* HACMOBJ; -typedef HACMSTREAM* LPHACMSTREAM; -typedef void* HACMDRIVERID; -typedef void* LPHACMDRIVER; -typedef unsigned char BYTE; -typedef BYTE* LPBYTE; -typedef char TCHAR; -typedef TCHAR* LPTSTR; -typedef const TCHAR* LPCTSTR; -typedef char* LPSTR; -typedef LPSTR LPOLESTR; -typedef const char* LPCSTR; -typedef LPCSTR LPCOLESTR; -typedef wchar_t WCHAR; -typedef unsigned short WORD; -typedef unsigned int UINT; -typedef UINT MMRESULT; -typedef uint32_t DWORD; -typedef DWORD COLORREF; -typedef DWORD FOURCC; -typedef DWORD HRESULT; -typedef DWORD* LPDWORD; -typedef DWORD* DWORD_PTR; -typedef int32_t LONG; -typedef int32_t* LONG_PTR; -typedef LONG_PTR LRESULT; -typedef uint32_t ULONG; -typedef uint32_t* ULONG_PTR; -//typedef __int64_t intptr_t; -typedef uint64_t _fsize_t; - - -// -// Structures -// - -typedef struct _GUID { - DWORD Data1; - WORD Data2; - WORD Data3; - BYTE Data4[8]; -} GUID; - -typedef GUID REFIID; -typedef GUID CLSID; -typedef CLSID* LPCLSID; -typedef GUID IID; - -#ifdef __cplusplus -}; // namespace avxsynth -#endif // __cplusplus -#endif // __DATA_TYPE_CONVERSIONS_H__
View file
x264-snapshot-20141218-2245.tar.bz2/extras/windowsPorts/windows2linux.h
Deleted
@@ -1,77 +0,0 @@ -#ifndef __WINDOWS2LINUX_H__ -#define __WINDOWS2LINUX_H__ - -/* - * LINUX SPECIFIC DEFINITIONS -*/ -// -// Data types conversions -// -#include <stdlib.h> -#include <string.h> -#include "basicDataTypeConversions.h" - -#ifdef __cplusplus -namespace avxsynth { -#endif // __cplusplus -// -// purposefully define the following MSFT definitions -// to mean nothing (as they do not mean anything on Linux) -// -#define __stdcall -#define __cdecl -#define noreturn -#define __declspec(x) -#define STDAPI extern "C" HRESULT -#define STDMETHODIMP HRESULT __stdcall -#define STDMETHODIMP_(x) x __stdcall - -#define STDMETHOD(x) virtual HRESULT x -#define STDMETHOD_(a, x) virtual a x - -#ifndef TRUE -#define TRUE true -#endif - -#ifndef FALSE -#define FALSE false -#endif - -#define S_OK (0x00000000) -#define S_FALSE (0x00000001) -#define E_NOINTERFACE (0X80004002) -#define E_POINTER (0x80004003) -#define E_FAIL (0x80004005) -#define E_OUTOFMEMORY (0x8007000E) - -#define INVALID_HANDLE_VALUE ((HANDLE)((LONG_PTR)-1)) -#define FAILED(hr) ((hr) & 0x80000000) -#define SUCCEEDED(hr) (!FAILED(hr)) - - -// -// Functions -// -#define MAKEDWORD(a,b,c,d) ((a << 24) | (b << 16) | (c << 8) | (d)) -#define MAKEWORD(a,b) ((a << 8) | (b)) - -#define lstrlen strlen -#define lstrcpy strcpy -#define lstrcmpi strcasecmp -#define _stricmp strcasecmp -#define InterlockedIncrement(x) __sync_fetch_and_add((x), 1) -#define InterlockedDecrement(x) __sync_fetch_and_sub((x), 1) -// Windows uses (new, old) ordering but GCC has (old, new) -#define InterlockedCompareExchange(x,y,z) __sync_val_compare_and_swap(x,z,y) - -#define UInt32x32To64(a, b) ( (uint64_t) ( ((uint64_t)((uint32_t)(a))) * ((uint32_t)(b)) ) ) -#define Int64ShrlMod32(a, b) ( (uint64_t) ( (uint64_t)(a) >> (b) ) ) -#define Int32x32To64(a, b) ((__int64)(((__int64)((long)(a))) * ((long)(b)))) - -#define MulDiv(nNumber, nNumerator, nDenominator) (int32_t) (((int64_t) (nNumber) * (int64_t) (nNumerator) + (int64_t) ((nDenominator)/2)) / (int64_t) (nDenominator)) - -#ifdef __cplusplus -}; // namespace avxsynth -#endif // __cplusplus - -#endif // __WINDOWS2LINUX_H__
View file
x264-snapshot-20141218-2245.tar.bz2/AUTHORS -> x264-snapshot-20150804-2245.tar.bz2/AUTHORS
Changed
@@ -1,8 +1,8 @@ # Contributors to x264 -# +# # The format of this file was inspired by the Linux kernel CREDITS file. # Authors are listed alphabetically. -# +# # The fields are: name (N), email (E), web-address (W), CVS account login (C), # PGP key ID and fingerprint (P), description (D), and snail-mail address (S).
View file
x264-snapshot-20141218-2245.tar.bz2/Makefile -> x264-snapshot-20150804-2245.tar.bz2/Makefile
Changed
@@ -87,12 +87,12 @@ endif X86SRC = $(X86SRC0:%=common/x86/%) -ifeq ($(ARCH),X86) +ifeq ($(SYS_ARCH),X86) ARCH_X86 = yes ASMSRC = $(X86SRC) common/x86/pixel-32.asm endif -ifeq ($(ARCH),X86_64) +ifeq ($(SYS_ARCH),X86_64) ARCH_X86 = yes ASMSRC = $(X86SRC:-32.asm=-64.asm) common/x86/trellis-64.asm endif @@ -106,7 +106,7 @@ endif # AltiVec optims -ifeq ($(ARCH),PPC) +ifeq ($(SYS_ARCH),PPC) ifneq ($(AS),) SRCS += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \ common/ppc/quant.c common/ppc/deblock.c \ @@ -115,7 +115,7 @@ endif # NEON optims -ifeq ($(ARCH),ARM) +ifeq ($(SYS_ARCH),ARM) ifneq ($(AS),) ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \ common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \ @@ -126,20 +126,32 @@ endif # AArch64 NEON optims -ifeq ($(ARCH),AARCH64) +ifeq ($(SYS_ARCH),AARCH64) ifneq ($(AS),) -ASMSRC += common/aarch64/dct-a.S \ +ASMSRC += common/aarch64/bitstream-a.S \ + common/aarch64/cabac-a.S \ + common/aarch64/dct-a.S \ common/aarch64/deblock-a.S \ common/aarch64/mc-a.S \ common/aarch64/pixel-a.S \ common/aarch64/predict-a.S \ common/aarch64/quant-a.S -SRCS += common/aarch64/mc-c.c \ +SRCS += common/aarch64/asm-offsets.c \ + common/aarch64/mc-c.c \ common/aarch64/predict-c.c OBJASM = $(ASMSRC:%.S=%.o) endif endif +# MSA optims +ifeq ($(SYS_ARCH),MIPS) +ifneq ($(findstring HAVE_MSA 1, $(CONFIG)),) +SRCS += common/mips/mc-c.c common/mips/dct-c.c \ + common/mips/deblock-c.c common/mips/pixel-c.c \ + common/mips/predict-c.c common/mips/quant-c.c +endif +endif + ifneq ($(HAVE_GETOPT_LONG),1) SRCCLI += extras/getopt.c endif @@ -264,7 +276,7 @@ rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc distclean: clean - rm -f config.mak x264_config.h config.h config.log x264.pc x264.def + rm -f config.mak x264_config.h config.h config.log x264.pc x264.def conftest* install-cli: cli $(INSTALL) -d $(DESTDIR)$(bindir)
View file
x264-snapshot-20150804-2245.tar.bz2/common/aarch64/asm-offsets.c
Added
@@ -0,0 +1,42 @@ +/***************************************************************************** + * asm-offsets.c: check asm offsets for aarch64 + ***************************************************************************** + * Copyright (C) 2014-2015 x264 project + * + * Authors: Janne Grunau <janne-x264@jannau.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "asm-offsets.h" + +#define X264_CHECK_OFFSET(s, m, o) struct check_##s##_##m \ +{ \ + int m_##m[2 * (offsetof(s, m) == o) - 1]; \ +} + +X264_CHECK_OFFSET(x264_cabac_t, i_low, CABAC_I_LOW); +X264_CHECK_OFFSET(x264_cabac_t, i_range, CABAC_I_RANGE); +X264_CHECK_OFFSET(x264_cabac_t, i_queue, CABAC_I_QUEUE); +X264_CHECK_OFFSET(x264_cabac_t, i_bytes_outstanding, CABAC_I_BYTES_OUTSTANDING); +X264_CHECK_OFFSET(x264_cabac_t, p_start, CABAC_P_START); +X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P); +X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END); +X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED); +X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE);
View file
x264-snapshot-20150804-2245.tar.bz2/common/aarch64/asm-offsets.h
Added
@@ -0,0 +1,39 @@ +/***************************************************************************** + * asm-offsets.h: asm offsets for aarch64 + ***************************************************************************** + * Copyright (C) 2014-2015 x264 project + * + * Authors: Janne Grunau <janne-x264@jannau.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_AARCH64_ASM_OFFSETS_H +#define X264_AARCH64_ASM_OFFSETS_H + +#define CABAC_I_LOW 0x00 +#define CABAC_I_RANGE 0x04 +#define CABAC_I_QUEUE 0x08 +#define CABAC_I_BYTES_OUTSTANDING 0x0c +#define CABAC_P_START 0x10 +#define CABAC_P 0x18 +#define CABAC_P_END 0x20 +#define CABAC_F8_BITS_ENCODED 0x30 +#define CABAC_STATE 0x34 + +#endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/asm.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/asm.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: AArch64 utility macros ***************************************************************************** - * Copyright (C) 2008-2014 x264 project + * Copyright (C) 2008-2015 x264 project * * Authors: Mans Rullgard <mans@mansr.com> * David Conrad <lessen42@gmail.com>
View file
x264-snapshot-20150804-2245.tar.bz2/common/aarch64/bitstream-a.S
Added
@@ -0,0 +1,82 @@ +/***************************************************************************** + * bitstream-a.S: aarch64 bitstream functions + ***************************************************************************** + * Copyright (C) 2014-2015 x264 project + * + * Authors: Janne Grunau <janne-x264@jannau.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "asm.S" + +function x264_nal_escape_neon, export=1 + movi v0.16b, #0xff + movi v4.16b, #4 + mov w3, #3 + subs x6, x1, x2 + cbz x6, 99f +0: + cmn x6, #15 + b.lt 16f + mov x1, x2 + b 100f +16: + ld1 {v1.16b}, [x1], #16 + ext v2.16b, v0.16b, v1.16b, #14 + ext v3.16b, v0.16b, v1.16b, #15 + cmhi v7.16b, v4.16b, v1.16b + cmeq v5.16b, v2.16b, #0 + cmeq v6.16b, v3.16b, #0 + and v5.16b, v5.16b, v7.16b + and v5.16b, v5.16b, v6.16b + shrn v7.8b, v5.8h, #4 + mov x7, v7.d[0] + cbz x7, 16f + mov x6, #-16 +100: + umov w5, v0.b[14] + umov w4, v0.b[15] + orr w5, w4, w5, lsl #8 +101: + ldrb w4, [x1, x6] + orr w9, w4, w5, lsl #16 + cmp w9, #3 + b.hi 102f + strb w3, [x0], #1 + orr w5, w3, w5, lsl #8 +102: + adds x6, x6, #1 + strb w4, [x0], #1 + orr w5, w4, w5, lsl #8 + b.lt 101b + subs x6, x1, x2 + lsr w9, w5, #8 + mov v0.b[14], w9 + mov v0.b[15], w5 + b.lt 0b + + ret +16: + subs x6, x1, x2 + st1 {v1.16b}, [x0], #16 + mov v0.16b, v1.16b + b.lt 0b +99: + ret +endfunc
View file
x264-snapshot-20150804-2245.tar.bz2/common/aarch64/cabac-a.S
Added
@@ -0,0 +1,122 @@ +/***************************************************************************** + * cabac-a.S: aarch64 cabac + ***************************************************************************** + * Copyright (C) 2014-2015 x264 project + * + * Authors: Janne Grunau <janne-x264@jannau.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "asm.S" +#include "asm-offsets.h" + +// w11 holds x264_cabac_t.i_low +// w12 holds x264_cabac_t.i_range + +function x264_cabac_encode_decision_asm, export=1 + movrel x8, X(x264_cabac_range_lps) + movrel x9, X(x264_cabac_transition) + add w10, w1, #CABAC_STATE + ldrb w3, [x0, x10] // i_state + ldr w12, [x0, #CABAC_I_RANGE] + and x4, x3, #~1 + asr w5, w12, #6 + add x8, x8, x4, lsl #1 + sub w5, w5, #4 + eor w6, w2, w3 // b ^ i_state + ldrb w4, [x8, x5] // i_range_lps + ldr w11, [x0, #CABAC_I_LOW] + sub w12, w12, w4 + tbz w6, #0, 1f // (b ^ i_state) & 1 + add w11, w11, w12 + mov w12, w4 +1: + orr w4, w2, w3, lsl #1 + ldrb w9, [x9, x4] + strb w9, [x0, x10] // i_state + +cabac_encode_renorm: + clz w5, w12 + ldr w2, [x0, #CABAC_I_QUEUE] + sub w5, w5, #23 + lsl w12, w12, w5 + lsl w11, w11, w5 +2: + adds w2, w2, w5 + str w12, [x0, #CABAC_I_RANGE] + b.lt 0f +cabac_putbyte: + mov w13, #0x400 + add w12, w2, #10 + lsl w13, w13, w2 + asr w4, w11, w12 // out + sub w2, w2, #8 + sub w13, w13, #1 + subs w5, w4, #0xff + and w11, w11, w13 + ldr w6, [x0, #CABAC_I_BYTES_OUTSTANDING] + str w2, [x0, #CABAC_I_QUEUE] + b.ne 1f + + add w6, w6, #1 + str w11, [x0, #CABAC_I_LOW] + str w6, [x0, #CABAC_I_BYTES_OUTSTANDING] + ret + +1: + ldr x7, [x0, #CABAC_P] + asr w5, w4, #8 // carry + ldrb w8, [x7, #-1] + add w8, w8, w5 + sub w5, w5, #1 + strb w8, [x7, #-1] + cbz w6, 3f +2: + subs w6, w6, #1 + strb w5, [x7], #1 + b.gt 2b +3: + strb w4, [x7], #1 + str wzr, [x0, #CABAC_I_BYTES_OUTSTANDING] + str x7, [x0, #CABAC_P] +0: + str w11, [x0, #CABAC_I_LOW] + str w2, [x0, #CABAC_I_QUEUE] + ret +endfunc + +function x264_cabac_encode_bypass_asm, export=1 + ldr w12, [x0, #CABAC_I_RANGE] + ldr w11, [x0, #CABAC_I_LOW] + ldr w2, [x0, #CABAC_I_QUEUE] + and w1, w1, w12 + add w11, w1, w11, lsl #1 + adds w2, w2, #1 + b.ge cabac_putbyte + str w11, [x0, #CABAC_I_LOW] + str w2, [x0, #CABAC_I_QUEUE] + ret +endfunc + +function x264_cabac_encode_terminal_asm, export=1 + ldr w12, [x0, #CABAC_I_RANGE] + ldr w11, [x0, #CABAC_I_LOW] + sub w12, w12, #2 + b cabac_encode_renorm +endfunc
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/dct-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/dct-a.S
Changed
@@ -1,9 +1,10 @@ /**************************************************************************** - * dct-a.S: AArch6464 transform and zigzag + * dct-a.S: aarch64 transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,6 +33,25 @@ .byte 26,27, 28,29, 22,23, 30,31 endconst +const scan4x4_field, align=4 +.byte 0,1, 2,3, 8,9, 4,5 +.byte 6,7, 10,11, 12,13, 14,15 +endconst + +const sub4x4_frame, align=4 +.byte 0, 1, 4, 8 +.byte 5, 2, 3, 6 +.byte 9, 12, 13, 10 +.byte 7, 11, 14, 15 +endconst + +const sub4x4_field, align=4 +.byte 0, 4, 1, 8 +.byte 12, 5, 9, 13 +.byte 2, 6, 10, 14 +.byte 3, 7, 11, 15 +endconst + // sum = a + (b>>shift) sub = (a>>shift) - b .macro SUMSUB_SHR shift sum sub a b t0 t1 sshr \t0, \b, #\shift @@ -602,56 +622,99 @@ ret endfunc +.macro sub4x4x2_dct_dc, dst, t0, t1, t2, t3, t4, t5, t6, t7 + ld1 {\t0\().8b}, [x1], x3 + ld1 {\t1\().8b}, [x2], x4 + ld1 {\t2\().8b}, [x1], x3 + ld1 {\t3\().8b}, [x2], x4 + usubl \t0\().8h, \t0\().8b, \t1\().8b + ld1 {\t4\().8b}, [x1], x3 + ld1 {\t5\().8b}, [x2], x4 + usubl \t1\().8h, \t2\().8b, \t3\().8b + ld1 {\t6\().8b}, [x1], x3 + ld1 {\t7\().8b}, [x2], x4 + add \dst\().8h, \t0\().8h, \t1\().8h + usubl \t2\().8h, \t4\().8b, \t5\().8b + usubl \t3\().8h, \t6\().8b, \t7\().8b + add \dst\().8h, \dst\().8h, \t2\().8h + add \dst\().8h, \dst\().8h, \t3\().8h +.endm + function x264_sub8x8_dct_dc_neon, export=1 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE - ld1 {v16.8b}, [x1], x3 - ld1 {v17.8b}, [x2], x4 - usubl v16.8h, v16.8b, v17.8b - ld1 {v18.8b}, [x1], x3 - ld1 {v19.8b}, [x2], x4 - usubl v17.8h, v18.8b, v19.8b - ld1 {v20.8b}, [x1], x3 - ld1 {v21.8b}, [x2], x4 - usubl v18.8h, v20.8b, v21.8b - ld1 {v22.8b}, [x1], x3 - add v0.8h, v16.8h, v17.8h - ld1 {v23.8b}, [x2], x4 - usubl v19.8h, v22.8b, v23.8b - ld1 {v24.8b}, [x1], x3 - add v0.8h, v0.8h, v18.8h - ld1 {v25.8b}, [x2], x4 - usubl v20.8h, v24.8b, v25.8b - ld1 {v26.8b}, [x1], x3 - add v0.8h, v0.8h, v19.8h - ld1 {v27.8b}, [x2], x4 - usubl v21.8h, v26.8b, v27.8b - ld1 {v28.8b}, [x1], x3 - ld1 {v29.8b}, [x2], x4 - usubl v22.8h, v28.8b, v29.8b - ld1 {v30.8b}, [x1], x3 - add v1.8h, v20.8h, v21.8h - ld1 {v31.8b}, [x2], x4 - usubl v23.8h, v30.8b, v31.8b - add v1.8h, v1.8h, v22.8h - add v1.8h, v1.8h, v23.8h + sub4x4x2_dct_dc v0, v16, v17, v18, v19, v20, v21, v22, v23 + sub4x4x2_dct_dc v1, v24, v25, v26, v27, v28, v29, v30, v31 + + transpose v2.2d, v3.2d, v0.2d, v1.2d + SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h + transpose v2.2d, v3.2d, v0.2d, v1.2d + SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h transpose v2.2d, v3.2d, v0.2d, v1.2d - add v0.8h, v2.8h, v3.8h - sub v1.8h, v2.8h, v3.8h + addp v0.8h, v2.8h, v3.8h + addp v0.8h, v0.8h, v0.8h - transpose v2.2d, v3.2d, v0.2d, v1.2d + st1 {v0.4h}, [x0] + ret +endfunc + +function x264_sub8x16_dct_dc_neon, export=1 + mov x3, #FENC_STRIDE + mov x4, #FDEC_STRIDE + sub4x4x2_dct_dc v0, v16, v17, v18, v19, v20, v21, v22, v23 + sub4x4x2_dct_dc v1, v24, v25, v26, v27, v28, v29, v30, v31 + sub4x4x2_dct_dc v2, v16, v17, v18, v19, v20, v21, v22, v23 + sub4x4x2_dct_dc v3, v24, v25, v26, v27, v28, v29, v30, v31 - add v0.8h, v2.8h, v3.8h - sub v1.8h, v2.8h, v3.8h + addp v4.8h, v0.8h, v2.8h + addp v5.8h, v1.8h, v3.8h + + transpose v2.4s, v3.4s, v4.4s, v5.4s + SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h + + transpose v2.4s, v3.4s, v0.4s, v1.4s + SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h transpose v2.2d, v3.2d, v0.2d, v1.2d + SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h + + trn1 v2.2d, v0.2d, v1.2d + trn2 v3.2d, v1.2d, v0.2d addp v0.8h, v2.8h, v3.8h - addp v0.8h, v0.8h, v0.8h - st1 {v0.4h}, [x0] + st1 {v0.8h}, [x0] + ret +endfunc + +function x264_zigzag_interleave_8x8_cavlc_neon, export=1 + mov x3, #7 + movi v31.4s, #1 + ld4 {v0.8h,v1.8h,v2.8h,v3.8h}, [x1], #64 + ld4 {v4.8h,v5.8h,v6.8h,v7.8h}, [x1], #64 + umax v16.8h, v0.8h, v4.8h + umax v17.8h, v1.8h, v5.8h + umax v18.8h, v2.8h, v6.8h + umax v19.8h, v3.8h, v7.8h + st1 {v0.8h}, [x0], #16 + st1 {v4.8h}, [x0], #16 + umaxp v16.8h, v16.8h, v17.8h + umaxp v18.8h, v18.8h, v19.8h + st1 {v1.8h}, [x0], #16 + st1 {v5.8h}, [x0], #16 + umaxp v16.8h, v16.8h, v18.8h + st1 {v2.8h}, [x0], #16 + st1 {v6.8h}, [x0], #16 + cmhi v16.4s, v16.4s, v31.4s + st1 {v3.8h}, [x0], #16 + and v16.16b, v16.16b, v31.16b + st1 {v7.8h}, [x0], #16 + st1 {v16.b}[0], [x2], #1 + st1 {v16.b}[4], [x2], x3 + st1 {v16.b}[8], [x2], #1 + st1 {v16.b}[12], [x2] ret endfunc @@ -664,3 +727,282 @@ st1 {v2.16b,v3.16b}, [x0] ret endfunc + +.macro zigzag_sub_4x4 f ac +function x264_zigzag_sub_4x4\ac\()_\f\()_neon, export=1 + mov x9, #FENC_STRIDE + mov x4, #FDEC_STRIDE + movrel x5, sub4x4_\f + mov x6, x2 + ld1 {v0.s}[0], [x1], x9 + ld1 {v0.s}[1], [x1], x9 + ld1 {v0.s}[2], [x1], x9 + ld1 {v0.s}[3], [x1], x9 + ld1 {v16.16b}, [x5] + ld1 {v1.s}[0], [x2], x4 + ld1 {v1.s}[1], [x2], x4 + ld1 {v1.s}[2], [x2], x4 + ld1 {v1.s}[3], [x2], x4 + tbl v2.16b, {v0.16b}, v16.16b + tbl v3.16b, {v1.16b}, v16.16b + st1 {v0.s}[0], [x6], x4 + usubl v4.8h, v2.8b, v3.8b
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/dct.h -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/dct.h
Changed
@@ -1,9 +1,10 @@ /***************************************************************************** - * dct.h: AArch64 transform and zigzag + * dct.h: aarch64 transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +41,7 @@ void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] ); void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] ); void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ); +void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 ); void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 ); void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 ); @@ -48,5 +50,18 @@ void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] ); void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] ); +void x264_zigzag_scan_4x4_field_neon( int16_t level[16], int16_t dct[16] ); +void x264_zigzag_scan_8x8_frame_neon( int16_t level[64], int16_t dct[64] ); +void x264_zigzag_scan_8x8_field_neon( int16_t level[64], int16_t dct[64] ); + +int x264_zigzag_sub_4x4_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +int x264_zigzag_sub_4x4ac_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ); +int x264_zigzag_sub_4x4_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +int x264_zigzag_sub_4x4ac_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ); + +int x264_zigzag_sub_8x8_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +int x264_zigzag_sub_8x8_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); + +void x264_zigzag_interleave_8x8_cavlc_neon( dctcoef *dst, dctcoef *src, uint8_t *nnz ); #endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/deblock-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/deblock-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: aarch64 deblocking ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Mans Rullgard <mans@mansr.com> * Janne Grunau <janne-x264@jannau.net> @@ -180,6 +180,202 @@ ret endfunc +.macro h264_loop_filter_start_intra + orr w4, w2, w3 + cmp w4, #0 + b.ne 1f + ret +1: + dup v30.16b, w2 // alpha + dup v31.16b, w3 // beta +.endm + +.macro h264_loop_filter_luma_intra + uabd v16.16b, v7.16b, v0.16b // abs(p0 - q0) + uabd v17.16b, v6.16b, v7.16b // abs(p1 - p0) + uabd v18.16b, v1.16b, v0.16b // abs(q1 - q0) + cmhi v19.16b, v30.16b, v16.16b // < alpha + cmhi v17.16b, v31.16b, v17.16b // < beta + cmhi v18.16b, v31.16b, v18.16b // < beta + + movi v29.16b, #2 + ushr v30.16b, v30.16b, #2 // alpha >> 2 + add v30.16b, v30.16b, v29.16b // (alpha >> 2) + 2 + cmhi v16.16b, v30.16b, v16.16b // < (alpha >> 2) + 2 + + and v19.16b, v19.16b, v17.16b + and v19.16b, v19.16b, v18.16b + shrn v20.8b, v19.8h, #4 + mov x4, v20.d[0] + cbz x4, 9f + + ushll v20.8h, v6.8b, #1 + ushll v22.8h, v1.8b, #1 + ushll2 v21.8h, v6.16b, #1 + ushll2 v23.8h, v1.16b, #1 + uaddw v20.8h, v20.8h, v7.8b + uaddw v22.8h, v22.8h, v0.8b + uaddw2 v21.8h, v21.8h, v7.16b + uaddw2 v23.8h, v23.8h, v0.16b + uaddw v20.8h, v20.8h, v1.8b + uaddw v22.8h, v22.8h, v6.8b + uaddw2 v21.8h, v21.8h, v1.16b + uaddw2 v23.8h, v23.8h, v6.16b + + rshrn v24.8b, v20.8h, #2 // p0'_1 + rshrn v25.8b, v22.8h, #2 // q0'_1 + rshrn2 v24.16b, v21.8h, #2 // p0'_1 + rshrn2 v25.16b, v23.8h, #2 // q0'_1 + + uabd v17.16b, v5.16b, v7.16b // abs(p2 - p0) + uabd v18.16b, v2.16b, v0.16b // abs(q2 - q0) + cmhi v17.16b, v31.16b, v17.16b // < beta + cmhi v18.16b, v31.16b, v18.16b // < beta + + and v17.16b, v16.16b, v17.16b // if_2 && if_3 + and v18.16b, v16.16b, v18.16b // if_2 && if_4 + + not v30.16b, v17.16b + not v31.16b, v18.16b + + and v30.16b, v30.16b, v19.16b // if_1 && !(if_2 && if_3) + and v31.16b, v31.16b, v19.16b // if_1 && !(if_2 && if_4) + + and v17.16b, v19.16b, v17.16b // if_1 && if_2 && if_3 + and v18.16b, v19.16b, v18.16b // if_1 && if_2 && if_4 + + //calc p, v7, v6, v5, v4, v17, v7, v6, v5, v4 + uaddl v26.8h, v5.8b, v7.8b + uaddl2 v27.8h, v5.16b, v7.16b + uaddw v26.8h, v26.8h, v0.8b + uaddw2 v27.8h, v27.8h, v0.16b + add v20.8h, v20.8h, v26.8h + add v21.8h, v21.8h, v27.8h + uaddw v20.8h, v20.8h, v0.8b + uaddw2 v21.8h, v21.8h, v0.16b + rshrn v20.8b, v20.8h, #3 // p0'_2 + rshrn2 v20.16b, v21.8h, #3 // p0'_2 + uaddw v26.8h, v26.8h, v6.8b + uaddw2 v27.8h, v27.8h, v6.16b + rshrn v21.8b, v26.8h, #2 // p1'_2 + rshrn2 v21.16b, v27.8h, #2 // p1'_2 + uaddl v28.8h, v4.8b, v5.8b + uaddl2 v29.8h, v4.16b, v5.16b + shl v28.8h, v28.8h, #1 + shl v29.8h, v29.8h, #1 + add v28.8h, v28.8h, v26.8h + add v29.8h, v29.8h, v27.8h + rshrn v19.8b, v28.8h, #3 // p2'_2 + rshrn2 v19.16b, v29.8h, #3 // p2'_2 + + //calc q, v0, v1, v2, v3, v18, v0, v1, v2, v3 + uaddl v26.8h, v2.8b, v0.8b + uaddl2 v27.8h, v2.16b, v0.16b + uaddw v26.8h, v26.8h, v7.8b + uaddw2 v27.8h, v27.8h, v7.16b + add v22.8h, v22.8h, v26.8h + add v23.8h, v23.8h, v27.8h + uaddw v22.8h, v22.8h, v7.8b + uaddw2 v23.8h, v23.8h, v7.16b + rshrn v22.8b, v22.8h, #3 // q0'_2 + rshrn2 v22.16b, v23.8h, #3 // q0'_2 + uaddw v26.8h, v26.8h, v1.8b + uaddw2 v27.8h, v27.8h, v1.16b + rshrn v23.8b, v26.8h, #2 // q1'_2 + rshrn2 v23.16b, v27.8h, #2 // q1'_2 + uaddl v28.8h, v2.8b, v3.8b + uaddl2 v29.8h, v2.16b, v3.16b + shl v28.8h, v28.8h, #1 + shl v29.8h, v29.8h, #1 + add v28.8h, v28.8h, v26.8h + add v29.8h, v29.8h, v27.8h + rshrn v26.8b, v28.8h, #3 // q2'_2 + rshrn2 v26.16b, v29.8h, #3 // q2'_2 + + bit v7.16b, v24.16b, v30.16b // p0'_1 + bit v0.16b, v25.16b, v31.16b // q0'_1 + bit v7.16b, v20.16b, v17.16b // p0'_2 + bit v6.16b, v21.16b, v17.16b // p1'_2 + bit v5.16b, v19.16b, v17.16b // p2'_2 + bit v0.16b, v22.16b, v18.16b // q0'_2 + bit v1.16b, v23.16b, v18.16b // q1'_2 + bit v2.16b, v26.16b, v18.16b // q2'_2 +.endm + +function x264_deblock_v_luma_intra_neon, export=1 + h264_loop_filter_start_intra + + ld1 {v0.16b}, [x0], x1 // q0 + ld1 {v1.16b}, [x0], x1 // q1 + ld1 {v2.16b}, [x0], x1 // q2 + ld1 {v3.16b}, [x0], x1 // q3 + sub x0, x0, x1, lsl #3 + ld1 {v4.16b}, [x0], x1 // p3 + ld1 {v5.16b}, [x0], x1 // p2 + ld1 {v6.16b}, [x0], x1 // p1 + ld1 {v7.16b}, [x0] // p0 + + h264_loop_filter_luma_intra + + sub x0, x0, x1, lsl #1 + st1 {v5.16b}, [x0], x1 // p2 + st1 {v6.16b}, [x0], x1 // p1 + st1 {v7.16b}, [x0], x1 // p0 + st1 {v0.16b}, [x0], x1 // q0 + st1 {v1.16b}, [x0], x1 // q1 + st1 {v2.16b}, [x0] // q2 +9: + ret +endfunc + +function x264_deblock_h_luma_intra_neon, export=1 + h264_loop_filter_start_intra + + sub x0, x0, #4 + ld1 {v4.8b}, [x0], x1 + ld1 {v5.8b}, [x0], x1 + ld1 {v6.8b}, [x0], x1 + ld1 {v7.8b}, [x0], x1 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x0], x1 + ld1 {v4.d}[1], [x0], x1 + ld1 {v5.d}[1], [x0], x1 + ld1 {v6.d}[1], [x0], x1 + ld1 {v7.d}[1], [x0], x1 + ld1 {v0.d}[1], [x0], x1 + ld1 {v1.d}[1], [x0], x1 + ld1 {v2.d}[1], [x0], x1 + ld1 {v3.d}[1], [x0], x1 + + transpose_8x16.b v4, v5, v6, v7, v0, v1, v2, v3, v21, v23 + + h264_loop_filter_luma_intra + + transpose_8x16.b v4, v5, v6, v7, v0, v1, v2, v3, v21, v23 + + sub x0, x0, x1, lsl #4 + st1 {v4.8b}, [x0], x1 + st1 {v5.8b}, [x0], x1 + st1 {v6.8b}, [x0], x1 + st1 {v7.8b}, [x0], x1 + st1 {v0.8b}, [x0], x1 + st1 {v1.8b}, [x0], x1 + st1 {v2.8b}, [x0], x1 + st1 {v3.8b}, [x0], x1 + st1 {v4.d}[1], [x0], x1 + st1 {v5.d}[1], [x0], x1 + st1 {v6.d}[1], [x0], x1
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/mc-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/mc-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Janne Grunau <janne-x264@jannau.net> @@ -1253,6 +1253,34 @@ ret endfunc +function x264_plane_copy_neon, export=1 + add x8, x4, #15 + and x4, x8, #~15 + sub x1, x1, x4 + sub x3, x3, x4 +1: + mov w8, w4 +16: + tst w8, #16 + b.eq 32f + subs w8, w8, #16 + ldr q0, [x2], #16 + str q0, [x0], #16 + b.eq 0f +32: + subs w8, w8, #32 + ldp q0, q1, [x2], #32 + stp q0, q1, [x0], #32 + b.gt 32b +0: + subs w5, w5, #1 + add x2, x2, x3 + add x0, x0, x1 + b.gt 1b + + ret +endfunc + function x264_plane_copy_deinterleave_neon, export=1 add w9, w6, #15 and w9, w9, #0xfffffff0 @@ -1363,3 +1391,279 @@ ret endfunc + +.macro integral4h p1, p2 + ext v1.8b, \p1\().8b, \p2\().8b, #1 + ext v2.8b, \p1\().8b, \p2\().8b, #2 + ext v3.8b, \p1\().8b, \p2\().8b, #3 + uaddl v0.8h, \p1\().8b, v1.8b + uaddl v4.8h, v2.8b, v3.8b + add v0.8h, v0.8h, v4.8h + add v0.8h, v0.8h, v5.8h +.endm + +function integral_init4h_neon, export=1 + sub x3, x0, x2 + ld1 {v6.8b,v7.8b}, [x1], #16 +1: + subs x2, x2, #16 + ld1 {v5.8h}, [x3], #16 + integral4h v6, v7 + ld1 {v6.8b}, [x1], #8 + ld1 {v5.8h}, [x3], #16 + st1 {v0.8h}, [x0], #16 + integral4h v7, v6 + ld1 {v7.8b}, [x1], #8 + st1 {v0.8h}, [x0], #16 + b.gt 1b + ret +endfunc + +.macro integral8h p1, p2, s + ext v1.8b, \p1\().8b, \p2\().8b, #1 + ext v2.8b, \p1\().8b, \p2\().8b, #2 + ext v3.8b, \p1\().8b, \p2\().8b, #3 + ext v4.8b, \p1\().8b, \p2\().8b, #4 + ext v5.8b, \p1\().8b, \p2\().8b, #5 + ext v6.8b, \p1\().8b, \p2\().8b, #6 + ext v7.8b, \p1\().8b, \p2\().8b, #7 + uaddl v0.8h, \p1\().8b, v1.8b + uaddl v2.8h, v2.8b, v3.8b + uaddl v4.8h, v4.8b, v5.8b + uaddl v6.8h, v6.8b, v7.8b + add v0.8h, v0.8h, v2.8h + add v4.8h, v4.8h, v6.8h + add v0.8h, v0.8h, v4.8h + add v0.8h, v0.8h, \s\().8h +.endm + +function integral_init8h_neon, export=1 + sub x3, x0, x2 + ld1 {v16.8b,v17.8b}, [x1], #16 +1: + subs x2, x2, #16 + ld1 {v18.8h}, [x3], #16 + integral8h v16, v17, v18 + ld1 {v16.8b}, [x1], #8 + ld1 {v18.8h}, [x3], #16 + st1 {v0.8h}, [x0], #16 + integral8h v17, v16, v18 + ld1 {v17.8b}, [x1], #8 + st1 {v0.8h}, [x0], #16 + b.gt 1b + ret +endfunc + +function integral_init4v_neon, export=1 + mov x3, x0 + add x4, x0, x2, lsl #3 + add x8, x0, x2, lsl #4 + sub x2, x2, #8 + ld1 {v20.8h,v21.8h,v22.8h}, [x3], #48 + ld1 {v16.8h,v17.8h,v18.8h}, [x8], #48 +1: + subs x2, x2, #16 + ld1 {v24.8h,v25.8h}, [x4], #32 + ext v0.16b, v20.16b, v21.16b, #8 + ext v1.16b, v21.16b, v22.16b, #8 + ext v2.16b, v16.16b, v17.16b, #8 + ext v3.16b, v17.16b, v18.16b, #8 + sub v24.8h, v24.8h, v20.8h + sub v25.8h, v25.8h, v21.8h + add v0.8h, v0.8h, v20.8h + add v1.8h, v1.8h, v21.8h + add v2.8h, v2.8h, v16.8h + add v3.8h, v3.8h, v17.8h + st1 {v24.8h}, [x1], #16 + st1 {v25.8h}, [x1], #16 + mov v20.16b, v22.16b + mov v16.16b, v18.16b + sub v0.8h, v2.8h, v0.8h + sub v1.8h, v3.8h, v1.8h + ld1 {v21.8h,v22.8h}, [x3], #32 + ld1 {v17.8h,v18.8h}, [x8], #32 + st1 {v0.8h}, [x0], #16 + st1 {v1.8h}, [x0], #16 + b.gt 1b +2: + ret +endfunc + +function integral_init8v_neon, export=1 + add x2, x0, x1, lsl #4 + sub x1, x1, #8 + ands x3, x1, #16 - 1 + b.eq 1f + subs x1, x1, #8 + ld1 {v0.8h}, [x0] + ld1 {v2.8h}, [x2], #16 + sub v4.8h, v2.8h, v0.8h + st1 {v4.8h}, [x0], #16 + b.le 2f +1: + subs x1, x1, #16 + ld1 {v0.8h,v1.8h}, [x0] + ld1 {v2.8h,v3.8h}, [x2], #32 + sub v4.8h, v2.8h, v0.8h + sub v5.8h, v3.8h, v1.8h + st1 {v4.8h}, [x0], #16 + st1 {v5.8h}, [x0], #16 + b.gt 1b +2: + ret +endfunc + +function x264_mbtree_propagate_cost_neon, export=1 + ld1r {v5.4s}, [x5] +8: + subs w6, w6, #8 + ld1 {v1.8h}, [x1], #16 + ld1 {v2.8h}, [x2], #16 + ld1 {v3.8h}, [x3], #16 + ld1 {v4.8h}, [x4], #16 + bic v3.8h, #0xc0, lsl #8 + umin v3.8h, v2.8h, v3.8h + umull v20.4s, v2.4h, v4.4h // propagate_intra + umull2 v21.4s, v2.8h, v4.8h // propagate_intra + usubl v22.4s, v2.4h, v3.4h // propagate_num + usubl2 v23.4s, v2.8h, v3.8h // propagate_num + uxtl v26.4s, v2.4h // propagate_denom + uxtl2 v27.4s, v2.8h // propagate_denom + uxtl v24.4s, v1.4h + uxtl2 v25.4s, v1.8h + ucvtf v20.4s, v20.4s + ucvtf v21.4s, v21.4s + ucvtf v26.4s, v26.4s + ucvtf v27.4s, v27.4s + ucvtf v22.4s, v22.4s + ucvtf v23.4s, v23.4s + frecpe v28.4s, v26.4s + frecpe v29.4s, v27.4s + ucvtf v24.4s, v24.4s + ucvtf v25.4s, v25.4s + frecps v30.4s, v28.4s, v26.4s + frecps v31.4s, v29.4s, v27.4s
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/mc-c.c -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/mc-c.c
Changed
@@ -1,9 +1,10 @@ /***************************************************************************** * mc-c.c: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,6 +49,8 @@ void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +void x264_plane_copy_neon( pixel *dst, intptr_t i_dst, + pixel *src, intptr_t i_src, int w, int h ); void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv, pixel *src, intptr_t i_src, int w, int h ); @@ -89,8 +92,14 @@ void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int ); +void integral_init4h_neon( uint16_t *, uint8_t *, intptr_t ); +void integral_init4v_neon( uint16_t *, uint16_t *, intptr_t ); +void integral_init8h_neon( uint16_t *, uint8_t *, intptr_t ); +void integral_init8v_neon( uint16_t *, intptr_t ); void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int ); +void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int ); + #if !HIGH_BIT_DEPTH static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w ) { @@ -132,9 +141,6 @@ x264_mc_copy_w16_neon, }; -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, uint8_t *src[4], intptr_t i_src_stride, int mvx, int mvy, @@ -142,13 +148,13 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); @@ -168,13 +174,13 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); @@ -199,6 +205,89 @@ int height, int16_t *buf ); #endif // !HIGH_BIT_DEPTH +#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<15)-1) +#define CLIP_ADD2(s,x)\ +do\ +{\ + CLIP_ADD((s)[0], (x)[0]);\ + CLIP_ADD((s)[1], (x)[1]);\ +} while(0) + +void x264_mbtree_propagate_list_internal_neon( int16_t (*mvs)[2], + int16_t *propagate_amount, + uint16_t *lowres_costs, + int16_t *output, + int bipred_weight, int mb_y, + int len ); + +static void x264_mbtree_propagate_list_neon( x264_t *h, uint16_t *ref_costs, + int16_t (*mvs)[2], + int16_t *propagate_amount, + uint16_t *lowres_costs, + int bipred_weight, int mb_y, + int len, int list ) +{ + int16_t *current = h->scratch_buffer2; + + x264_mbtree_propagate_list_internal_neon( mvs, propagate_amount, + lowres_costs, current, + bipred_weight, mb_y, len ); + + unsigned stride = h->mb.i_mb_stride; + unsigned width = h->mb.i_mb_width; + unsigned height = h->mb.i_mb_height; + + for( unsigned i = 0; i < len; current += 32 ) + { + int end = X264_MIN( i+8, len ); + for( ; i < end; i++, current += 2 ) + { + if( !(lowres_costs[i] & (1 << (list+LOWRES_COST_SHIFT))) ) + continue; + + unsigned mbx = current[0]; + unsigned mby = current[1]; + unsigned idx0 = mbx + mby * stride; + unsigned idx2 = idx0 + stride; + + /* Shortcut for the simple/common case of zero MV */ + if( !M32( mvs[i] ) ) + { + CLIP_ADD( ref_costs[idx0], current[16] ); + continue; + } + + if( mbx < width-1 && mby < height-1 ) + { + CLIP_ADD2( ref_costs+idx0, current+16 ); + CLIP_ADD2( ref_costs+idx2, current+32 ); + } + else + { + /* Note: this takes advantage of unsigned representation to + * catch negative mbx/mby. */ + if( mby < height ) + { + if( mbx < width ) + CLIP_ADD( ref_costs[idx0+0], current[16] ); + if( mbx+1 < width ) + CLIP_ADD( ref_costs[idx0+1], current[17] ); + } + if( mby+1 < height ) + { + if( mbx < width ) + CLIP_ADD( ref_costs[idx2+0], current[32] ); + if( mbx+1 < width ) + CLIP_ADD( ref_costs[idx2+1], current[33] ); + } + } + } + } +} + +#undef CLIP_ADD +#undef CLIP_ADD2 + void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf ) { #if !HIGH_BIT_DEPTH @@ -217,6 +306,7 @@ pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon; pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon; + pf->plane_copy = x264_plane_copy_neon; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; pf->plane_copy_interleave = x264_plane_copy_interleave_neon; @@ -245,5 +335,16 @@ pf->get_ref = get_ref_neon; pf->hpel_filter = x264_hpel_filter_neon; pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon; + + pf->integral_init4h = integral_init4h_neon; + pf->integral_init8h = integral_init8h_neon; + pf->integral_init4v = integral_init4v_neon; + pf->integral_init8v = integral_init8v_neon; + + pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon; + pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon; + + pf->memcpy_aligned = x264_memcpy_aligned_neon; + pf->memzero_aligned = x264_memzero_aligned_neon; #endif // !HIGH_BIT_DEPTH }
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/mc.h -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/mc.h
Changed
@@ -1,7 +1,9 @@ /***************************************************************************** * mc.h: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2014 x264 project + * Copyright (C) 2014-2015 x264 project + * + * Authors: Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/pixel-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/pixel-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: aarch64 pixel metrics ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Janne Grunau <janne-x264@jannau.net> @@ -114,6 +114,7 @@ SAD_FUNC 4, 4 SAD_FUNC 4, 8 +SAD_FUNC 4, 16 SAD_FUNC 8, 4 SAD_FUNC 8, 8 SAD_FUNC 8, 16 @@ -148,7 +149,7 @@ \first v17.8h, v2.8b, v0.8b ld1 {v3.8b}, [x3], x5 ld1 {v1.8b}, [x1], x5 - \first v18.8h, v3.8b, v0.8b + \first v18.8h, v3.8b, v0.8b uabal v16.8h, v1.8b, v5.8b ld1 {v2.8b}, [x2], x5 ld1 {v3.8b}, [x3], x5 @@ -248,6 +249,56 @@ SAD_X_FUNC 4, 16, 16 +function x264_pixel_vsad_neon, export=1 + subs w2, w2, #2 + ld1 {v0.16b}, [x0], x1 + ld1 {v1.16b}, [x0], x1 + uabdl v6.8h, v0.8b, v1.8b + uabdl2 v7.8h, v0.16b, v1.16b + b.le 2f +1: + subs w2, w2, #2 + ld1 {v0.16b}, [x0], x1 + uabal v6.8h, v1.8b, v0.8b + uabal2 v7.8h, v1.16b, v0.16b + ld1 {v1.16b}, [x0], x1 + b.lt 2f + uabal v6.8h, v0.8b, v1.8b + uabal2 v7.8h, v0.16b, v1.16b + b.gt 1b +2: + add v5.8h, v6.8h, v7.8h + uaddlv s0, v5.8h + fmov w0, s0 + ret +endfunc + +function x264_pixel_asd8_neon, export=1 + sub w4, w4, #2 + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x2], x3 + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x2], x3 + usubl v16.8h, v0.8b, v1.8b +1: + subs w4, w4, #2 + ld1 {v4.8b}, [x0], x1 + ld1 {v5.8b}, [x2], x3 + usubl v17.8h, v2.8b, v3.8b + usubl v18.8h, v4.8b, v5.8b + add v16.8h, v16.8h, v17.8h + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x2], x3 + add v16.8h, v16.8h, v18.8h + b.gt 1b + usubl v17.8h, v2.8b, v3.8b + add v16.8h, v16.8h, v17.8h + saddlv s0, v16.8h + abs v0.2s, v0.2s + fmov w0, s0 + ret +endfunc + .macro SSD_START_4 ld1 {v16.s}[0], [x0], x1 ld1 {v17.s}[0], [x2], x3 @@ -343,12 +394,84 @@ SSD_FUNC 4, 4 SSD_FUNC 4, 8 +SSD_FUNC 4, 16 SSD_FUNC 8, 4 SSD_FUNC 8, 8 SSD_FUNC 8, 16 SSD_FUNC 16, 8 SSD_FUNC 16, 16 + +function x264_pixel_ssd_nv12_core_neon, export=1 + sxtw x8, w4 + add x8, x8, #8 + and x8, x8, #~15 + movi v6.2d, #0 + movi v7.2d, #0 + sub x1, x1, x8, lsl #1 + sub x3, x3, x8, lsl #1 +1: + subs w8, w4, #16 + ld2 {v0.8b,v1.8b}, [x0], #16 + ld2 {v2.8b,v3.8b}, [x2], #16 + ld2 {v24.8b,v25.8b}, [x0], #16 + ld2 {v26.8b,v27.8b}, [x2], #16 + + usubl v16.8h, v0.8b, v2.8b + usubl v17.8h, v1.8b, v3.8b + smull v20.4s, v16.4h, v16.4h + smull v21.4s, v17.4h, v17.4h + usubl v18.8h, v24.8b, v26.8b + usubl v19.8h, v25.8b, v27.8b + smlal2 v20.4s, v16.8h, v16.8h + smlal2 v21.4s, v17.8h, v17.8h + + b.lt 4f + b.eq 3f +2: + smlal v20.4s, v18.4h, v18.4h + smlal v21.4s, v19.4h, v19.4h + ld2 {v0.8b,v1.8b}, [x0], #16 + ld2 {v2.8b,v3.8b}, [x2], #16 + smlal2 v20.4s, v18.8h, v18.8h + smlal2 v21.4s, v19.8h, v19.8h + + subs w8, w8, #16 + usubl v16.8h, v0.8b, v2.8b + usubl v17.8h, v1.8b, v3.8b + smlal v20.4s, v16.4h, v16.4h + smlal v21.4s, v17.4h, v17.4h + ld2 {v24.8b,v25.8b}, [x0], #16 + ld2 {v26.8b,v27.8b}, [x2], #16 + smlal2 v20.4s, v16.8h, v16.8h + smlal2 v21.4s, v17.8h, v17.8h + b.lt 4f + + usubl v18.8h, v24.8b, v26.8b + usubl v19.8h, v25.8b, v27.8b + b.gt 2b +3: + smlal v20.4s, v18.4h, v18.4h + smlal v21.4s, v19.4h, v19.4h + smlal2 v20.4s, v18.8h, v18.8h + smlal2 v21.4s, v19.8h, v19.8h +4: + subs w5, w5, #1 + uaddw v6.2d, v6.2d, v20.2s + uaddw v7.2d, v7.2d, v21.2s + add x0, x0, x1 + add x2, x2, x3 + uaddw2 v6.2d, v6.2d, v20.4s + uaddw2 v7.2d, v7.2d, v21.4s + b.gt 1b + + addp v6.2d, v6.2d, v7.2d + st1 {v6.d}[0], [x6] + st1 {v6.d}[1], [x7] + + ret +endfunc + .macro pixel_var_8 h function x264_pixel_var_8x\h\()_neon, export=1 ld1 {v16.8b}, [x0], x1 @@ -800,10 +923,65 @@ b x264_satd_8x4v_8x8h_neon endfunc +function x264_pixel_satd_4x16_neon, export=1 + mov x4, x30 + ld1 {v1.s}[0], [x2], x3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v3.s}[0], [x2], x3 + ld1 {v2.s}[0], [x0], x1 + ld1 {v5.s}[0], [x2], x3 + ld1 {v4.s}[0], [x0], x1 + ld1 {v7.s}[0], [x2], x3 + ld1 {v6.s}[0], [x0], x1 + ld1 {v1.s}[1], [x2], x3 + ld1 {v0.s}[1], [x0], x1 + ld1 {v3.s}[1], [x2], x3 + ld1 {v2.s}[1], [x0], x1 + ld1 {v5.s}[1], [x2], x3 + ld1 {v4.s}[1], [x0], x1 + ld1 {v7.s}[1], [x2], x3 + ld1 {v6.s}[1], [x0], x1 + usubl v16.8h, v0.8b, v1.8b + usubl v17.8h, v2.8b, v3.8b + usubl v18.8h, v4.8b, v5.8b + usubl v19.8h, v6.8b, v7.8b + ld1 {v1.s}[0], [x2], x3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v3.s}[0], [x2], x3 + ld1 {v2.s}[0], [x0], x1 + ld1 {v5.s}[0], [x2], x3 + ld1 {v4.s}[0], [x0], x1
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/pixel.h -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/pixel.h
Changed
@@ -1,9 +1,10 @@ /***************************************************************************** * pixel.h: aarch64 pixel metrics ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,6 +33,7 @@ ret x264_pixel_##name##_8x16_##suffix args;\ ret x264_pixel_##name##_8x8_##suffix args;\ ret x264_pixel_##name##_8x4_##suffix args;\ + ret x264_pixel_##name##_4x16_##suffix args;\ ret x264_pixel_##name##_4x8_##suffix args;\ ret x264_pixel_##name##_4x4_##suffix args;\ @@ -47,8 +49,14 @@ DECL_X1( satd, neon ) DECL_X1( ssd, neon ) + +void x264_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * ); + +int x264_pixel_vsad_neon( uint8_t *, intptr_t, int ); + int x264_pixel_sa8d_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t ); int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); +uint64_t x264_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); uint64_t x264_pixel_var_8x8_neon ( uint8_t *, intptr_t ); uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t ); @@ -66,4 +74,6 @@ int sums[2][4] ); float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width ); +int x264_pixel_asd8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); + #endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/predict-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/predict-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Mans Rullgard <mans@mansr.com> @@ -436,14 +436,25 @@ endfunc function x264_predict_8x8c_dc_left_neon, export=1 - sub x2, x0, #1 + ldrb w2, [x0, #0 * FDEC_STRIDE - 1] + ldrb w3, [x0, #1 * FDEC_STRIDE - 1] + ldrb w4, [x0, #2 * FDEC_STRIDE - 1] + ldrb w5, [x0, #3 * FDEC_STRIDE - 1] mov x1, #FDEC_STRIDE - ldcol.8 v0, x2, x1 - uaddlp v0.4h, v0.8b - addp v0.4h, v0.4h, v0.4h + add w2, w2, w3 + add w3, w4, w5 + ldrb w6, [x0, #4 * FDEC_STRIDE - 1] + ldrb w7, [x0, #5 * FDEC_STRIDE - 1] + ldrb w8, [x0, #6 * FDEC_STRIDE - 1] + ldrb w9, [x0, #7 * FDEC_STRIDE - 1] + add w6, w6, w7 + add w7, w8, w9 + add w2, w2, w3 + add w6, w6, w7 + dup v0.8h, w2 + dup v1.8h, w6 rshrn v0.8b, v0.8h, #2 - dup v1.8b, v0.b[1] - dup v0.8b, v0.b[0] + rshrn v1.8b, v1.8h, #2 b pred8x8c_dc_end endfunc @@ -546,6 +557,223 @@ endfunc +.macro loadsum4 wd, t1, t2, t3, x, idx + ldrb \wd, [\x, #(\idx + 0) * FDEC_STRIDE - 1] + ldrb \t1, [\x, #(\idx + 1) * FDEC_STRIDE - 1] + ldrb \t2, [\x, #(\idx + 2) * FDEC_STRIDE - 1] + ldrb \t3, [\x, #(\idx + 3) * FDEC_STRIDE - 1] + add \wd, \wd, \t1 + add \t1, \t2, \t3 + add \wd, \wd, \t1 +.endm + +function x264_predict_8x16c_h_neon, export=1 + sub x2, x0, #1 + add x3, x0, #FDEC_STRIDE - 1 + mov x7, #2 * FDEC_STRIDE + add x1, x0, #FDEC_STRIDE +.rept 4 + ld1r {v0.8b}, [x2], x7 + ld1r {v1.8b}, [x3], x7 + ld1r {v2.8b}, [x2], x7 + ld1r {v3.8b}, [x3], x7 + st1 {v0.8b}, [x0], x7 + st1 {v1.8b}, [x1], x7 + st1 {v2.8b}, [x0], x7 + st1 {v3.8b}, [x1], x7 +.endr + ret +endfunc + +function x264_predict_8x16c_v_neon, export=1 + sub x1, x0, #FDEC_STRIDE + mov x2, #2 * FDEC_STRIDE + ld1 {v0.8b}, [x1], x2 +.rept 8 + st1 {v0.8b}, [x0], x2 + st1 {v0.8b}, [x1], x2 +.endr + ret +endfunc + +function x264_predict_8x16c_p_neon, export=1 + movrel x4, p16weight + ld1 {v17.8h}, [x4] + sub x3, x0, #FDEC_STRIDE + mov x1, #FDEC_STRIDE + add x2, x3, #4 + sub x3, x3, #1 + + ld1 {v0.8b}, [x3] + ld1 {v2.8b}, [x2], x1 + ldcol.8 v1, x3, x1 + add x3, x3, x1 + ldcol.8 v3, x3, x1 + ext v4.8b, v2.8b, v2.8b, #3 + ext v5.8b, v3.8b, v3.8b, #7 + rev32 v0.8b, v0.8b + rev64 v1.8b, v1.8b + + uaddl v4.8h, v5.8b, v4.8b // a * 1/16 + + usubl v2.8h, v2.8b, v0.8b + mul v2.8h, v2.8h, v17.8h + saddlp v2.4s, v2.8h + addp v2.4s, v2.4s, v2.4s // H + + usubl v3.8h, v3.8b, v1.8b + mul v3.8h, v3.8h, v17.8h + saddlp v3.4s, v3.8h + addp v3.4s, v3.4s, v3.4s + addp v3.4s, v3.4s, v3.4s // V + + ext v17.16b, v17.16b, v17.16b, #14 + + shl v4.4h, v4.4h, #4 // a + shl v6.2s, v2.2s, #4 // 16 * H + shl v7.2s, v3.2s, #2 // 4 * V + add v2.2s, v2.2s, v6.2s // 17 * H + add v3.2s, v3.2s, v7.2s // 5 * V + rshrn v2.4h, v2.4s, #5 // b + rshrn v3.4h, v3.4s, #6 // c + + mov v17.h[0], wzr + + sub v4.4h, v4.4h, v2.4h // a - b + shl v6.4h, v2.4h, #1 // 2 * b + add v4.4h, v4.4h, v3.4h // a - b + c + shl v7.4h, v3.4h, #3 // 8 * c + sub v4.4h, v4.4h, v6.4h // a - 3b + c + sub v4.4h, v4.4h, v7.4h // a - 3b - 7c + + mul v0.8h, v17.8h, v2.h[0] // 0,1,2,3,4,5,6,7 * b + dup v1.8h, v4.h[0] // i00 + dup v2.8h, v3.h[0] // c + add v1.8h, v1.8h, v0.8h // pix + {0..7}*b + mov x3, #16 +1: + subs x3, x3, #2 + sqrshrun v4.8b, v1.8h, #5 + add v1.8h, v1.8h, v2.8h + sqrshrun v5.8b, v1.8h, #5 + st1 {v4.8b}, [x0], x1 + add v1.8h, v1.8h, v2.8h + st1 {v5.8b}, [x0], x1 + b.ne 1b + ret +endfunc + +function x264_predict_8x16c_dc_neon, export=1 + sub x3, x0, #FDEC_STRIDE + mov x1, #FDEC_STRIDE + ld1 {v6.8b}, [x3] + loadsum4 w2, w3, w4, w5, x0, 0 + uaddlp v6.4h, v6.8b + dup v22.8h, w2 // s2 + loadsum4 w6, w7, w8, w9, x0, 4 + addp v6.4h, v6.4h, v6.4h // s0, s1 + dup v23.8h, w6 // s3 + loadsum4 w2, w3, w4, w5, x0, 8 + dup v20.8h, v6.h[0] // s0 + dup v24.8h, w2 // s4 + loadsum4 w6, w7, w8, w9, x0, 12 + dup v21.8h, v6.h[1] // s1 + dup v25.8h, w6 // s5 + + ext v16.16b, v20.16b, v21.16b, #8 + ext v17.16b, v22.16b, v21.16b, #8 + ext v1.16b, v23.16b, v21.16b, #8 + ext v2.16b, v24.16b, v21.16b, #8 + ext v3.16b, v25.16b, v21.16b, #8 + + add v0.8h, v16.8h, v17.8h + add v1.8h, v1.8h, v23.8h + add v2.8h, v2.8h, v24.8h + add v3.8h, v3.8h, v25.8h + + rshrn v0.8b, v0.8h, #3 + rshrn v1.8b, v1.8h, #3 + rshrn v2.8b, v2.8h, #3 + rshrn v3.8b, v3.8h, #3 +.irp idx, 0, 1, 2, 3 +.rept 4 + st1 {v\idx\().8b}, [x0], x1 +.endr +.endr + ret +endfunc + +function x264_predict_8x16c_dc_left_neon, export=1 + mov x1, #FDEC_STRIDE + ldrb w2, [x0, # 0 * FDEC_STRIDE - 1] + ldrb w3, [x0, # 1 * FDEC_STRIDE - 1] + ldrb w4, [x0, # 2 * FDEC_STRIDE - 1] + ldrb w5, [x0, # 3 * FDEC_STRIDE - 1] + add w2, w2, w3 + + ldrb w6, [x0, # 4 * FDEC_STRIDE - 1]
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/predict-c.c -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/predict-c.c
Changed
@@ -1,9 +1,10 @@ /***************************************************************************** * predict.c: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,6 +36,10 @@ void x264_predict_8x8c_dc_left_neon( uint8_t *src ); void x264_predict_8x8c_p_neon( uint8_t *src ); +void x264_predict_8x16c_dc_left_neon( uint8_t *src ); +void x264_predict_8x16c_dc_top_neon( uint8_t *src ); +void x264_predict_8x16c_p_neon( uint8_t *src ); + void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] ); void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] ); void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] ); @@ -80,6 +85,22 @@ #endif // !HIGH_BIT_DEPTH } + +void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] ) +{ + if (!(cpu&X264_CPU_NEON)) + return; + +#if !HIGH_BIT_DEPTH + pf[I_PRED_CHROMA_V ] = x264_predict_8x16c_v_neon; + pf[I_PRED_CHROMA_H ] = x264_predict_8x16c_h_neon; + pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_neon; + pf[I_PRED_CHROMA_P ] = x264_predict_8x16c_p_neon; + pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x16c_dc_left_neon; + pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x16c_dc_top_neon; +#endif // !HIGH_BIT_DEPTH +} + void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) { if (!(cpu&X264_CPU_NEON))
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/predict.h -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/predict.h
Changed
@@ -1,9 +1,10 @@ /***************************************************************************** * predict.h: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +41,9 @@ void x264_predict_8x8c_dc_neon( uint8_t *src ); void x264_predict_8x8c_h_neon( uint8_t *src ); void x264_predict_8x8c_v_neon( uint8_t *src ); +void x264_predict_8x16c_v_neon( uint8_t *src ); +void x264_predict_8x16c_h_neon( uint8_t *src ); +void x264_predict_8x16c_dc_neon( uint8_t *src ); void x264_predict_16x16_v_neon( uint8_t *src ); void x264_predict_16x16_h_neon( uint8_t *src ); void x264_predict_16x16_dc_neon( uint8_t *src ); @@ -47,6 +51,7 @@ void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] ); void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] ); void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] ); #endif /* X264_AARCH64_PREDICT_H */
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/quant-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/quant-a.S
Changed
@@ -1,9 +1,10 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -300,6 +301,118 @@ ret endfunc +.macro decimate_score_1x size +function x264_decimate_score\size\()_neon, export=1 + ld1 {v0.8h,v1.8h}, [x0] + movrel x5, X(x264_decimate_table4) + movi v3.16b, #0x01 + sqxtn v0.8b, v0.8h + sqxtn2 v0.16b, v1.8h + abs v2.16b, v0.16b + cmeq v1.16b, v0.16b, #0 + cmhi v2.16b, v2.16b, v3.16b + shrn v1.8b, v1.8h, #4 + shrn v2.8b, v2.8h, #4 + fmov x2, d2 + fmov x1, d1 + cbnz x2, 9f + mvn x1, x1 + mov w0, #0 + cbz x1, 0f +.ifc \size, 15 + lsr x1, x1, #1 +.endif + rbit x1, x1 +1: + clz x3, x1 + lsr x6, x3, #2 + lsl x1, x1, x3 + ldrb w7, [x5, x6] + cbz x1, 2f + lsl x1, x1, #4 + add w0, w0, w7 + cbnz x1, 1b + ret +2: + add w0, w0, w7 +0: + ret +9: + mov w0, #9 + ret +endfunc +.endm + +decimate_score_1x 15 +decimate_score_1x 16 + +const mask64, align=6 + .byte 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 + .byte 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 +endconst + +function x264_decimate_score64_neon, export=1 + ld1 {v0.8h,v1.8h}, [x0], #32 + ld1 {v2.8h,v3.8h}, [x0], #32 + ld1 {v4.8h,v5.8h}, [x0], #32 + ld1 {v6.8h,v7.8h}, [x0] + movrel x6, mask64 + movi v31.16b, #0x01 + sqxtn v16.8b, v1.8h + sqxtn2 v16.16b, v0.8h + sqxtn v17.8b, v3.8h + sqxtn2 v17.16b, v2.8h + sqxtn v18.8b, v5.8h + sqxtn2 v18.16b, v4.8h + sqxtn v19.8b, v7.8h + sqxtn2 v19.16b, v6.8h + abs v4.16b, v16.16b + abs v5.16b, v17.16b + abs v6.16b, v18.16b + abs v7.16b, v19.16b + ld1 {v30.16b}, [x6] + cmeq v0.16b, v16.16b, #0 + cmeq v1.16b, v17.16b, #0 + cmeq v2.16b, v18.16b, #0 + cmeq v3.16b, v19.16b, #0 + umax v4.16b, v4.16b, v5.16b + umax v6.16b, v6.16b, v7.16b + and v0.16b, v0.16b, v30.16b + and v1.16b, v1.16b, v30.16b + and v2.16b, v2.16b, v30.16b + and v3.16b, v3.16b, v30.16b + umax v4.16b, v4.16b, v6.16b + addp v0.16b, v1.16b, v0.16b + addp v2.16b, v3.16b, v2.16b + cmhi v4.16b, v4.16b, v31.16b + addp v0.16b, v2.16b, v0.16b + shrn v4.8b, v4.8h, #4 + addp v0.16b, v0.16b, v0.16b + fmov x2, d4 + fmov x1, d0 + cbnz x2, 9f + mvn x1, x1 + mov w0, #0 + cbz x1, 0f + movrel x5, X(x264_decimate_table8) +1: + clz x3, x1 + lsl x1, x1, x3 + ldrb w7, [x5, x3] + cbz x1, 2f + lsl x1, x1, #1 + add w0, w0, w7 + cbnz x1, 1b + ret +2: + add w0, w0, w7 +0: + ret +9: + mov w0, #9 + ret +endfunc + // int coeff_last( int16_t *l ) function x264_coeff_last4_aarch64, export=1 ldr x2, [x0] @@ -384,3 +497,105 @@ sub w0, w3, w2 ret endfunc + +.macro coeff_level_run_start size + add x6, x1, #23 // runlevel->mask + mov w7, #0 + mov w8, #0 + mov w9, #1 + and x6, x6, #~15 + mov w4, #\size - 1 +.endm + +.macro coeff_level_run shift + clz x3, x2 + subs w4, w4, w3, lsr #\shift + str w4, [x1], #4 +1: + ldrh w5, [x0, x4, lsl #1] + strh w5, [x6], #2 + add w7, w7, #1 + lsl w10, w9, w4 + orr w8, w8, w10 + b.le 2f + add w3, w3, #1 << \shift + sub w4, w4, #1 + and x3, x3, #~((1 << \shift) - 1) + lsl x2, x2, x3 + clz x3, x2 + subs w4, w4, w3, lsr #\shift + b.ge 1b +2: + str w8, [x1] + mov w0, w7 +.endm + +function x264_coeff_level_run4_aarch64, export=1 + ldr x2, [x0] + + coeff_level_run_start 4 + + coeff_level_run 4 + + ret +endfunc + +.macro X264_COEFF_LEVEL_RUN size +function x264_coeff_level_run\size\()_neon, export=1 +.if \size == 15 + sub x0, x0, #2 +.endif +.if \size < 15 + .equ shiftw, 3 + ld1 {v0.8h}, [x0] + uqxtn v0.8b, v0.8h + cmtst v0.8b, v0.8b, v0.8b +.else + .equ shiftw, 2 + ld1 {v0.8h,v1.8h}, [x0] + uqxtn v0.8b, v0.8h + uqxtn2 v0.16b, v1.8h + cmtst v0.16b, v0.16b, v0.16b + shrn v0.8b, v0.8h, #4 +.endif + fmov x2, d0 +.if \size == 15 + add x0, x0, #2 +.endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/aarch64/quant.h -> x264-snapshot-20150804-2245.tar.bz2/common/aarch64/quant.h
Changed
@@ -1,9 +1,10 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> + * Janne Grunau <janne-x264@jannau.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,10 +39,21 @@ void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp ); void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +int x264_decimate_score15_neon( int16_t * ); +int x264_decimate_score16_neon( int16_t * ); +int x264_decimate_score64_neon( int16_t * ); + int x264_coeff_last4_aarch64( int16_t * ); int x264_coeff_last8_aarch64( int16_t * ); int x264_coeff_last15_neon( int16_t * ); int x264_coeff_last16_neon( int16_t * ); int x264_coeff_last64_neon( int16_t * ); +int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * ); +int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * ); +int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * ); +int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * ); + +void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int ); + #endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/asm.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/asm.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: arm utility macros ***************************************************************************** - * Copyright (C) 2008-2014 x264 project + * Copyright (C) 2008-2015 x264 project * * Authors: Mans Rullgard <mans@mansr.com> * David Conrad <lessen42@gmail.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/cpu-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/cpu-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cpu-a.S: arm cpu detection ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/dct-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/dct-a.S
Changed
@@ -1,7 +1,7 @@ /**************************************************************************** * dct-a.S: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/dct.h -> x264-snapshot-20150804-2245.tar.bz2/common/arm/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/deblock-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/deblock-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: arm deblocking ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Mans Rullgard <mans@mansr.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/mc-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/mc-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Mans Rullgard <mans@mansr.com> @@ -1566,6 +1566,30 @@ pop {r4-r7, pc} endfunc +function x264_plane_copy_swap_neon + push {r4-r5, lr} + ldrd r4, r5, [sp, #12] + add lr, r4, #15 + bic lr, lr, #15 + sub r1, r1, lr, lsl #1 + sub r3, r3, lr, lsl #1 +1: + vld1.8 {q0, q1}, [r2]! + subs lr, lr, #16 + vrev16.8 q0, q0 + vrev16.8 q1, q1 + vst1.8 {q0, q1}, [r0]! + bgt 1b + + subs r5, r5, #1 + add r0, r0, r1 + add r2, r2, r3 + mov lr, r4 + bgt 1b + + pop {r4-r5, pc} +endfunc + function x264_store_interleave_chroma_neon push {lr} ldr lr, [sp, #4]
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/mc-c.c -> x264-snapshot-20150804-2245.tar.bz2/common/arm/mc-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> * @@ -57,6 +57,8 @@ void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); +void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst, + pixel *src, intptr_t i_src, int w, int h ); void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); @@ -136,9 +138,6 @@ x264_mc_copy_w16_neon, }; -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, uint8_t *src[4], intptr_t i_src_stride, int mvx, int mvy, @@ -146,13 +145,13 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); @@ -172,13 +171,13 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); @@ -243,6 +242,7 @@ pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; pf->plane_copy_interleave = x264_plane_copy_interleave_neon; + pf->plane_copy_swap = x264_plane_copy_swap_neon; pf->store_interleave_chroma = x264_store_interleave_chroma_neon; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/mc.h -> x264-snapshot-20150804-2245.tar.bz2/common/arm/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/pixel-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/pixel-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/pixel.h -> x264-snapshot-20150804-2245.tar.bz2/common/arm/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/predict-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/predict-a.S
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> * Mans Rullgard <mans@mansr.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/predict-c.c -> x264-snapshot-20150804-2245.tar.bz2/common/arm/predict-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/predict.h -> x264-snapshot-20150804-2245.tar.bz2/common/arm/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/quant-a.S -> x264-snapshot-20150804-2245.tar.bz2/common/arm/quant-a.S
Changed
@@ -1,7 +1,7 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/arm/quant.h -> x264-snapshot-20150804-2245.tar.bz2/common/arm/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: David Conrad <lessen42@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/bitstream.c -> x264-snapshot-20150804-2245.tar.bz2/common/bitstream.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.c: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Fiona Glaser <fiona@x264.com> @@ -54,6 +54,8 @@ void x264_cabac_block_residual_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end ); + /**************************************************************************** * x264_nal_encode: ****************************************************************************/ @@ -142,4 +144,8 @@ } #endif #endif +#if ARCH_AARCH64 + if( cpu&X264_CPU_NEON ) + pf->nal_escape = x264_nal_escape_neon; +#endif }
View file
x264-snapshot-20141218-2245.tar.bz2/common/bitstream.h -> x264-snapshot-20150804-2245.tar.bz2/common/bitstream.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/cabac.c -> x264-snapshot-20150804-2245.tar.bz2/common/cabac.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/cabac.h -> x264-snapshot-20150804-2245.tar.bz2/common/cabac.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cabac.h: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -72,6 +72,10 @@ #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm +#elif defined(ARCH_AARCH64) +#define x264_cabac_encode_decision x264_cabac_encode_decision_asm +#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm +#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm #else #define x264_cabac_encode_decision x264_cabac_encode_decision_c #define x264_cabac_encode_bypass x264_cabac_encode_bypass_c
View file
x264-snapshot-20141218-2245.tar.bz2/common/common.c -> x264-snapshot-20150804-2245.tar.bz2/common/common.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * common.c: misc common functions ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -579,6 +579,7 @@ { char *name_buf = NULL; int b_error = 0; + int errortype = X264_PARAM_BAD_VALUE; int name_was_bool; int value_was_null = !value; int i; @@ -595,6 +596,8 @@ { char *c; name_buf = strdup(name); + if( !name_buf ) + return X264_PARAM_BAD_NAME; while( (c = strchr( name_buf, '_' )) ) *c = '-'; name = name_buf; @@ -617,20 +620,23 @@ !strcasecmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0; if( b_error ) { - char *buf = strdup(value); - char *tok, UNUSED *saveptr=NULL, *init; - b_error = 0; - p->cpu = 0; - for( init=buf; (tok=strtok_r(init, ",", &saveptr)); init=NULL ) + char *buf = strdup( value ); + if( buf ) { - for( i=0; x264_cpu_names[i].flags && strcasecmp(tok, x264_cpu_names[i].name); i++ ); - p->cpu |= x264_cpu_names[i].flags; - if( !x264_cpu_names[i].flags ) - b_error = 1; + char *tok, UNUSED *saveptr=NULL, *init; + b_error = 0; + p->cpu = 0; + for( init=buf; (tok=strtok_r(init, ",", &saveptr)); init=NULL ) + { + for( i=0; x264_cpu_names[i].flags && strcasecmp(tok, x264_cpu_names[i].name); i++ ); + p->cpu |= x264_cpu_names[i].flags; + if( !x264_cpu_names[i].flags ) + b_error = 1; + } + free( buf ); + if( (p->cpu&X264_CPU_SSSE3) && !(p->cpu&X264_CPU_SSE2_IS_SLOW) ) + p->cpu |= X264_CPU_SSE2_IS_FAST; } - free( buf ); - if( (p->cpu&X264_CPU_SSSE3) && !(p->cpu&X264_CPU_SSE2_IS_SLOW) ) - p->cpu |= X264_CPU_SSE2_IS_FAST; } } OPT("threads") @@ -1049,7 +1055,10 @@ OPT("opencl-device") p->i_opencl_device = atoi( value ); else - return X264_PARAM_BAD_NAME; + { + b_error = 1; + errortype = X264_PARAM_BAD_NAME; + } #undef OPT #undef OPT2 #undef atobool @@ -1060,7 +1069,7 @@ free( name_buf ); b_error |= value_was_null && !name_was_bool; - return b_error ? X264_PARAM_BAD_VALUE : 0; + return b_error ? errortype : 0; } /**************************************************************************** @@ -1133,6 +1142,7 @@ [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, [X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, + [X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, [X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } }, [X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } }, [X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, }, @@ -1265,29 +1275,36 @@ char *x264_slurp_file( const char *filename ) { int b_error = 0; - size_t i_size; + int64_t i_size; char *buf; FILE *fh = x264_fopen( filename, "rb" ); if( !fh ) return NULL; + b_error |= fseek( fh, 0, SEEK_END ) < 0; b_error |= ( i_size = ftell( fh ) ) <= 0; + if( WORD_SIZE == 4 ) + b_error |= i_size > INT32_MAX; b_error |= fseek( fh, 0, SEEK_SET ) < 0; if( b_error ) goto error; + buf = x264_malloc( i_size+2 ); if( !buf ) goto error; + b_error |= fread( buf, 1, i_size, fh ) != i_size; - if( buf[i_size-1] != '\n' ) - buf[i_size++] = '\n'; - buf[i_size] = 0; fclose( fh ); if( b_error ) { x264_free( buf ); return NULL; } + + if( buf[i_size-1] != '\n' ) + buf[i_size++] = '\n'; + buf[i_size] = '\0'; + return buf; error: fclose( fh );
View file
x264-snapshot-20141218-2245.tar.bz2/common/common.h -> x264-snapshot-20150804-2245.tar.bz2/common/common.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * common.h: misc common functions ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/cpu.c -> x264-snapshot-20150804-2245.tar.bz2/common/cpu.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cpu.c: cpu detection ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -67,8 +67,8 @@ {"AVX", AVX}, {"XOP", AVX|X264_CPU_XOP}, {"FMA4", AVX|X264_CPU_FMA4}, - {"AVX2", AVX|X264_CPU_AVX2}, {"FMA3", AVX|X264_CPU_FMA3}, + {"AVX2", AVX|X264_CPU_FMA3|X264_CPU_AVX2}, #undef AVX #undef SSE2 #undef MMX2 @@ -92,6 +92,8 @@ #elif ARCH_AARCH64 {"ARMv8", X264_CPU_ARMV8}, {"NEON", X264_CPU_NEON}, +#elif ARCH_MIPS + {"MSA", X264_CPU_MSA}, #endif {"", 0}, }; @@ -419,6 +421,17 @@ return X264_CPU_ARMV8 | X264_CPU_NEON; } +#elif ARCH_MIPS + +uint32_t x264_cpu_detect( void ) +{ + uint32_t flags = 0; +#if HAVE_MSA + flags |= X264_CPU_MSA; +#endif + return flags; +} + #else uint32_t x264_cpu_detect( void )
View file
x264-snapshot-20141218-2245.tar.bz2/common/cpu.h -> x264-snapshot-20150804-2245.tar.bz2/common/cpu.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cpu.h: cpu detection ***************************************************************************** - * Copyright (C) 2004-2014 x264 project + * Copyright (C) 2004-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * @@ -45,7 +45,6 @@ #define x264_emms() #endif #define x264_sfence x264_cpu_sfence -void x264_safe_intel_cpu_indicator_init( void ); /* kludge: * gcc can't give variables any greater alignment than the stack frame has.
View file
x264-snapshot-20141218-2245.tar.bz2/common/dct.c -> x264-snapshot-20150804-2245.tar.bz2/common/dct.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -38,6 +38,9 @@ #if ARCH_AARCH64 # include "aarch64/dct.h" #endif +#if ARCH_MIPS +# include "mips/dct.h" +#endif /* the inverse of the scaling factors introduced by 8x8 fdct */ /* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */ @@ -747,8 +750,32 @@ dctf->add8x8_idct8 = x264_add8x8_idct8_neon; dctf->add16x16_idct8= x264_add16x16_idct8_neon; +#if ARCH_AARCH64 + dctf->sub8x16_dct_dc= x264_sub8x16_dct_dc_neon; +#endif + } +#endif + +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + dctf->sub4x4_dct = x264_sub4x4_dct_msa; + dctf->sub8x8_dct = x264_sub8x8_dct_msa; + dctf->sub16x16_dct = x264_sub16x16_dct_msa; + dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_msa; + dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_msa; + dctf->dct4x4dc = x264_dct4x4dc_msa; + dctf->idct4x4dc = x264_idct4x4dc_msa; + dctf->add4x4_idct = x264_add4x4_idct_msa; + dctf->add8x8_idct = x264_add8x8_idct_msa; + dctf->add8x8_idct_dc = x264_add8x8_idct_dc_msa; + dctf->add16x16_idct = x264_add16x16_idct_msa; + dctf->add16x16_idct_dc = x264_add16x16_idct_dc_msa; + dctf->add8x8_idct8 = x264_add8x8_idct8_msa; + dctf->add16x16_idct8 = x264_add16x16_idct8_msa; } #endif + #endif // HIGH_BIT_DEPTH } @@ -1004,7 +1031,20 @@ #endif #if HAVE_ARMV6 || ARCH_AARCH64 if( cpu&X264_CPU_NEON ) - pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon; + { + pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon; +#if ARCH_AARCH64 + pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon; + pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon; + pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon; + pf_interlaced->sub_4x4ac = x264_zigzag_sub_4x4ac_field_neon; + pf_interlaced->sub_8x8 = x264_zigzag_sub_8x8_field_neon; + pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_neon; + pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon; + pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon; + pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon; +#endif // ARCH_AARCH64 + } #endif // HAVE_ARMV6 || ARCH_AARCH64 #endif // HIGH_BIT_DEPTH @@ -1047,4 +1087,21 @@ } #endif // HIGH_BIT_DEPTH #endif +#if !HIGH_BIT_DEPTH +#if ARCH_AARCH64 + if( cpu&X264_CPU_NEON ) + { + pf_interlaced->interleave_8x8_cavlc = + pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon; + } +#endif // ARCH_AARCH64 +#endif // !HIGH_BIT_DEPTH +#if !HIGH_BIT_DEPTH +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_msa; + } +#endif +#endif }
View file
x264-snapshot-20141218-2245.tar.bz2/common/dct.h -> x264-snapshot-20150804-2245.tar.bz2/common/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: transform and zigzag ***************************************************************************** - * Copyright (C) 2004-2014 x264 project + * Copyright (C) 2004-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/deblock.c -> x264-snapshot-20150804-2245.tar.bz2/common/deblock.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: deblocking ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -612,8 +612,10 @@ uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength; if( intra_cur ) { - memset( &bs[0][1], 3, 3*4*sizeof(uint8_t) ); - memset( &bs[1][1], 3, 3*4*sizeof(uint8_t) ); + M32( bs[0][1] ) = 0x03030303; + M64( bs[0][2] ) = 0x0303030303030303ULL; + M32( bs[1][1] ) = 0x03030303; + M64( bs[1][2] ) = 0x0303030303030303ULL; } else h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv, @@ -737,6 +739,32 @@ void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, int bframe ); +#if ARCH_AARCH64 +void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#endif +#endif + +#if !HIGH_BIT_DEPTH +#if HAVE_MSA +void x264_deblock_v_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_h_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_v_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_h_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +void x264_deblock_strength_msa( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, + int bframe ); +#endif #endif void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) @@ -835,18 +863,43 @@ { pf->deblock_luma[1] = x264_deblock_v_luma_altivec; pf->deblock_luma[0] = x264_deblock_h_luma_altivec; - } + } #endif // HAVE_ALTIVEC #if HAVE_ARMV6 || ARCH_AARCH64 - if( cpu&X264_CPU_NEON ) - { + if( cpu&X264_CPU_NEON ) + { pf->deblock_luma[1] = x264_deblock_v_luma_neon; pf->deblock_luma[0] = x264_deblock_h_luma_neon; pf->deblock_chroma[1] = x264_deblock_v_chroma_neon; pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon; +#if ARCH_AARCH64 + pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon; + pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon; + pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon; + pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon; + pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon; + pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon; + pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_neon; + pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_neon; +#endif pf->deblock_strength = x264_deblock_strength_neon; - } + } +#endif + +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf->deblock_luma[1] = x264_deblock_v_luma_msa; + pf->deblock_luma[0] = x264_deblock_h_luma_msa; + pf->deblock_chroma[1] = x264_deblock_v_chroma_msa; + pf->deblock_h_chroma_420 = x264_deblock_h_chroma_msa; + pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_msa; + pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_msa; + pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_msa; + pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_msa; + pf->deblock_strength = x264_deblock_strength_msa; + } #endif #endif // !HIGH_BIT_DEPTH
View file
x264-snapshot-20141218-2245.tar.bz2/common/frame.c -> x264-snapshot-20150804-2245.tar.bz2/common/frame.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * frame.c: frame handling ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -47,6 +47,7 @@ switch( external_csp & X264_CSP_MASK ) { case X264_CSP_NV12: + case X264_CSP_NV21: case X264_CSP_I420: case X264_CSP_YV12: return X264_CSP_NV12; @@ -77,7 +78,7 @@ #if ARCH_X86 || ARCH_X86_64 if( h->param.cpu&X264_CPU_CACHELINE_64 ) align = 64; - else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX2 ) + else if( h->param.cpu&X264_CPU_CACHELINE_32 || h->param.cpu&X264_CPU_AVX ) align = 32; #endif #if ARCH_PPC @@ -387,7 +388,15 @@ return -1; } - dst->i_type = src->i_type; + if( src->i_type < X264_TYPE_AUTO || src->i_type > X264_TYPE_KEYFRAME ) + { + x264_log( h, X264_LOG_WARNING, "forced frame type (%d) at %d is unknown\n", src->i_type, h->frames.i_input ); + dst->i_forced_type = X264_TYPE_AUTO; + } + else + dst->i_forced_type = src->i_type; + + dst->i_type = dst->i_forced_type; dst->i_qpplus1 = src->i_qpplus1; dst->i_pts = dst->i_reordered_pts = src->i_pts; dst->param = src->param; @@ -435,6 +444,12 @@ h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1], stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift ); } + else if( i_csp == X264_CSP_NV21 ) + { + get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift ); + h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1], + stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift ); + } else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 ) { int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
View file
x264-snapshot-20141218-2245.tar.bz2/common/frame.h -> x264-snapshot-20150804-2245.tar.bz2/common/frame.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * frame.h: frame handling ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -39,6 +39,7 @@ int i_poc; int i_delta_poc[2]; int i_type; + int i_forced_type; int i_qpplus1; int64_t i_pts; int64_t i_dts;
View file
x264-snapshot-20141218-2245.tar.bz2/common/macroblock.c -> x264-snapshot-20150804-2245.tar.bz2/common/macroblock.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.c: macroblock common functions ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Fiona Glaser <fiona@x264.com> * Laurent Aimar <fenrir@via.ecp.fr> @@ -1158,7 +1158,7 @@ { // Looking at the bottom field so always take the bottom macroblock of the pair. h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]]; - h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]]; + h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]]; h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]]; CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] ); CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] ); @@ -1436,8 +1436,10 @@ uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength; if( IS_INTRA( h->mb.i_type ) ) { - memset( bs[0][1], 3, 3*4*sizeof(uint8_t) ); - memset( bs[1][1], 3, 3*4*sizeof(uint8_t) ); + M32( bs[0][1] ) = 0x03030303; + M64( bs[0][2] ) = 0x0303030303030303ULL; + M32( bs[1][1] ) = 0x03030303; + M64( bs[1][2] ) = 0x0303030303030303ULL; return; } @@ -1450,7 +1452,9 @@ M32( bs[0][0] ) = 0x02020202; M32( bs[0][2] ) = 0x02020202; M32( bs[0][4] ) = 0x02020202; - memset( bs[1][0], 2, 5*4*sizeof(uint8_t) ); /* [1][1] and [1][3] has to be set for 4:2:2 */ + M64( bs[1][0] ) = 0x0202020202020202ULL; /* [1][1] and [1][3] has to be set for 4:2:2 */ + M64( bs[1][2] ) = 0x0202020202020202ULL; + M32( bs[1][4] ) = 0x02020202; return; } }
View file
x264-snapshot-20141218-2245.tar.bz2/common/macroblock.h -> x264-snapshot-20150804-2245.tar.bz2/common/macroblock.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock common functions ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/mc.c -> x264-snapshot-20150804-2245.tar.bz2/common/mc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: motion compensation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -38,6 +38,9 @@ #if ARCH_AARCH64 #include "aarch64/mc.h" #endif +#if ARCH_MIPS +#include "mips/mc.h" +#endif static inline void pixel_avg( pixel *dst, intptr_t i_dst_stride, @@ -189,8 +192,8 @@ } } -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; +const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; +const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2}; static void mc_luma( pixel *dst, intptr_t i_dst_stride, pixel *src[4], intptr_t i_src_stride, @@ -199,11 +202,11 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -222,11 +225,11 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, *i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -299,6 +302,17 @@ } } +void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst, + pixel *src, intptr_t i_src, int w, int h ) +{ + for( int y=0; y<h; y++, dst+=i_dst, src+=i_src ) + for( int x=0; x<2*w; x+=2 ) + { + dst[x] = src[x+1]; + dst[x+1] = src[x]; + } +} + void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ) @@ -612,6 +626,7 @@ pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec; pf->plane_copy = x264_plane_copy_c; + pf->plane_copy_swap = x264_plane_copy_swap_c; pf->plane_copy_interleave = x264_plane_copy_interleave_c; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c; @@ -647,6 +662,10 @@ #if ARCH_AARCH64 x264_mc_init_aarch64( cpu, pf ); #endif +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + x264_mc_init_mips( cpu, pf ); +#endif if( cpu_independent ) {
View file
x264-snapshot-20141218-2245.tar.bz2/common/mc.h -> x264-snapshot-20150804-2245.tar.bz2/common/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: motion compensation ***************************************************************************** - * Copyright (C) 2004-2014 x264 project + * Copyright (C) 2004-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * @@ -41,6 +41,8 @@ } ALIGNED_16( x264_weight_t ); extern const x264_weight_t x264_weight_none[3]; +extern const uint8_t x264_hpel_ref0[16]; +extern const uint8_t x264_hpel_ref1[16]; #define SET_WEIGHT( w, b, s, d, o )\ {\ @@ -86,6 +88,7 @@ void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height ); void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); + void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); /* may write up to 15 pixels off the end of each plane */
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips
Added
+(directory)
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/dct-c.c
Added
@@ -0,0 +1,525 @@ +/***************************************************************************** + * dct-c.c: msa transform and zigzag + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Rishikesh More <rishikesh.more@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "macros.h" + +#if !HIGH_BIT_DEPTH +#define AVC_ITRANS_H( in0, in1, in2, in3, out0, out1, out2, out3 ) \ +{ \ + v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + tmp0_m = in0 + in2; \ + tmp1_m = in0 - in2; \ + tmp2_m = in1 >> 1; \ + tmp2_m = tmp2_m - in3; \ + tmp3_m = in3 >> 1; \ + tmp3_m = in1 + tmp3_m; \ + \ + BUTTERFLY_4( tmp0_m, tmp1_m, tmp2_m, tmp3_m, out0, out1, out2, out3 ); \ +} + +static void avc_dct4x4dc_msa( int16_t *p_src, int16_t *p_dst, + int32_t i_src_stride ) +{ + v8i16 src0, src1, src2, src3, ver_res0, ver_res1, ver_res2, ver_res3; + v4i32 src0_r, src1_r, src2_r, src3_r, tmp0, tmp1, tmp2, tmp3; + v4i32 hor_res0, hor_res1, hor_res2, hor_res3; + v4i32 ver_res0_r, ver_res1_r, ver_res2_r, ver_res3_r; + + LD_SH4( p_src, i_src_stride, src0, src1, src2, src3 ); + UNPCK_R_SH_SW( src0, src0_r ); + UNPCK_R_SH_SW( src1, src1_r ); + UNPCK_R_SH_SW( src2, src2_r ); + UNPCK_R_SH_SW( src3, src3_r ); + BUTTERFLY_4( src0_r, src2_r, src3_r, src1_r, + tmp0, tmp3, tmp2, tmp1 ); + BUTTERFLY_4( tmp0, tmp1, tmp2, tmp3, + hor_res0, hor_res3, hor_res2, hor_res1 ); + TRANSPOSE4x4_SW_SW( hor_res0, hor_res1, hor_res2, hor_res3, + hor_res0, hor_res1, hor_res2, hor_res3 ); + BUTTERFLY_4( hor_res0, hor_res2, hor_res3, hor_res1, + tmp0, tmp3, tmp2, tmp1 ); + BUTTERFLY_4( tmp0, tmp1, tmp2, tmp3, + ver_res0_r, ver_res3_r, ver_res2_r, ver_res1_r ); + SRARI_W4_SW( ver_res0_r, ver_res1_r, ver_res2_r, ver_res3_r, 1 ); + PCKEV_H4_SH( ver_res0_r, ver_res0_r, ver_res1_r, ver_res1_r, + ver_res2_r, ver_res2_r, ver_res3_r, ver_res3_r, + ver_res0, ver_res1, ver_res2, ver_res3 ); + PCKOD_D2_SH( ver_res1, ver_res0, ver_res3, ver_res2, ver_res0, ver_res2 ); + ST_SH2( ver_res0, ver_res2, p_dst, 8 ); +} + +static void avc_sub4x4_dct_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_ref, int32_t i_dst_stride, + int16_t *p_dst ) +{ + uint32_t i_src0, i_src1, i_src2, i_src3; + uint32_t i_ref0, i_ref1, i_ref2, i_ref3; + v16i8 src = { 0 }; + v16i8 ref = { 0 }; + v16u8 inp0, inp1; + v8i16 diff0, diff1, diff2, diff3; + v8i16 temp0, temp1, temp2, temp3; + + LW4( p_src, i_src_stride, i_src0, i_src1, i_src2, i_src3 ); + LW4( p_ref, i_dst_stride, i_ref0, i_ref1, i_ref2, i_ref3 ); + + INSERT_W4_SB( i_src0, i_src1, i_src2, i_src3, src ); + INSERT_W4_SB( i_ref0, i_ref1, i_ref2, i_ref3, ref ); + + ILVRL_B2_UB( src, ref, inp0, inp1 ); + + HSUB_UB2_SH( inp0, inp1, diff0, diff2 ); + + diff1 = ( v8i16 ) __msa_ilvl_d( ( v2i64 ) diff0, ( v2i64 ) diff0 ); + diff3 = ( v8i16 ) __msa_ilvl_d( ( v2i64 ) diff2, ( v2i64 ) diff2 ); + + BUTTERFLY_4( diff0, diff1, diff2, diff3, temp0, temp1, temp2, temp3 ); + + diff0 = temp0 + temp1; + diff1 = ( temp3 << 1 ) + temp2; + diff2 = temp0 - temp1; + diff3 = temp3 - ( temp2 << 1 ); + + TRANSPOSE4x4_SH_SH( diff0, diff1, diff2, diff3, + temp0, temp1, temp2, temp3 ); + BUTTERFLY_4( temp0, temp1, temp2, temp3, diff0, diff1, diff2, diff3 ); + + temp0 = diff0 + diff1; + temp1 = ( diff3 << 1 ) + diff2; + temp2 = diff0 - diff1; + temp3 = diff3 - ( diff2 << 1 ); + + ILVR_D2_UB( temp1, temp0, temp3, temp2, inp0, inp1 ); + ST_UB2( inp0, inp1, p_dst, 8 ); +} + +static void avc_zigzag_scan_4x4_frame_msa( int16_t pi_dct[16], + int16_t pi_level[16] ) +{ + v8i16 src0, src1; + v8i16 mask0 = { 0, 4, 1, 2, 5, 8, 12, 9 }; + v8i16 mask1 = { 6, 3, 7, 10, 13, 14, 11, 15 }; + + LD_SH2( pi_dct, 8, src0, src1 ); + VSHF_H2_SH( src0, src1, src0, src1, mask0, mask1, mask0, mask1 ); + ST_SH2( mask0, mask1, pi_level, 8 ); +} + +static void avc_idct4x4_addblk_msa( uint8_t *p_dst, int16_t *p_src, + int32_t i_dst_stride ) +{ + v8i16 src0, src1, src2, src3; + v8i16 hres0, hres1, hres2, hres3; + v8i16 vres0, vres1, vres2, vres3; + v8i16 zeros = { 0 }; + + LD4x4_SH( p_src, src0, src1, src2, src3 ); + AVC_ITRANS_H( src0, src1, src2, src3, hres0, hres1, hres2, hres3 ); + TRANSPOSE4x4_SH_SH( hres0, hres1, hres2, hres3, + hres0, hres1, hres2, hres3 ); + AVC_ITRANS_H( hres0, hres1, hres2, hres3, vres0, vres1, vres2, vres3 ); + SRARI_H4_SH( vres0, vres1, vres2, vres3, 6 ); + ADDBLK_ST4x4_UB( vres0, vres1, vres2, vres3, p_dst, i_dst_stride ); + ST_SH2( zeros, zeros, p_src, 8 ); +} + +static void avc_idct4x4_addblk_dc_msa( uint8_t *p_dst, int16_t *p_src, + int32_t i_dst_stride ) +{ + int16_t i_dc; + uint32_t i_src0, i_src1, i_src2, i_src3; + v16u8 pred = { 0 }; + v16i8 out; + v8i16 input_dc, pred_r, pred_l; + + i_dc = ( p_src[0] + 32 ) >> 6; + input_dc = __msa_fill_h( i_dc ); + p_src[ 0 ] = 0; + + LW4( p_dst, i_dst_stride, i_src0, i_src1, i_src2, i_src3 ); + INSERT_W4_UB( i_src0, i_src1, i_src2, i_src3, pred ); + UNPCK_UB_SH( pred, pred_r, pred_l ); + + pred_r += input_dc; + pred_l += input_dc; + + CLIP_SH2_0_255( pred_r, pred_l ); + out = __msa_pckev_b( ( v16i8 ) pred_l, ( v16i8 ) pred_r ); + ST4x4_UB( out, out, 0, 1, 2, 3, p_dst, i_dst_stride ); +} + +static void avc_idct8_addblk_msa( uint8_t *p_dst, int16_t *p_src, + int32_t i_dst_stride ) +{ + v8i16 src0, src1, src2, src3, src4, src5, src6, src7; + v8i16 vec0, vec1, vec2, vec3; + v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + v8i16 res0, res1, res2, res3, res4, res5, res6, res7; + v4i32 tmp0_r, tmp1_r, tmp2_r, tmp3_r, tmp4_r, tmp5_r, tmp6_r, tmp7_r; + v4i32 tmp0_l, tmp1_l, tmp2_l, tmp3_l, tmp4_l, tmp5_l, tmp6_l, tmp7_l; + v4i32 vec0_r, vec1_r, vec2_r, vec3_r, vec0_l, vec1_l, vec2_l, vec3_l; + v4i32 res0_r, res1_r, res2_r, res3_r, res4_r, res5_r, res6_r, res7_r; + v4i32 res0_l, res1_l, res2_l, res3_l, res4_l, res5_l, res6_l, res7_l; + v16i8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; + v16i8 zeros = { 0 }; + + p_src[ 0 ] += 32; + + LD_SH8( p_src, 8, src0, src1, src2, src3, src4, src5, src6, src7 ); + + vec0 = src0 + src4; + vec1 = src0 - src4; + vec2 = src2 >> 1; + vec2 = vec2 - src6; + vec3 = src6 >> 1; + vec3 = src2 + vec3;
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/dct.h
Added
@@ -0,0 +1,49 @@ +/***************************************************************************** + * dct.h: msa transform and zigzag + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Rishikesh More <rishikesh.more@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_DCT_H +#define X264_MIPS_DCT_H + +void x264_dct4x4dc_msa( int16_t d[16] ); +void x264_idct4x4dc_msa( int16_t d[16] ); +void x264_add4x4_idct_msa( uint8_t *p_dst, int16_t pi_dct[16] ); +void x264_add8x8_idct_msa( uint8_t *p_dst, int16_t pi_dct[4][16] ); +void x264_add16x16_idct_msa( uint8_t *p_dst, int16_t pi_dct[16][16] ); +void x264_add8x8_idct8_msa( uint8_t *p_dst, int16_t pi_dct[64] ); +void x264_add16x16_idct8_msa( uint8_t *p_dst, int16_t pi_dct[4][64] ); +void x264_add8x8_idct_dc_msa( uint8_t *p_dst, int16_t pi_dct[4] ); +void x264_add16x16_idct_dc_msa( uint8_t *p_dst, int16_t pi_dct[16] ); +void x264_sub4x4_dct_msa( int16_t p_dst[16], uint8_t *p_src, uint8_t *p_ref ); +void x264_sub8x8_dct_msa( int16_t p_dst[4][16], uint8_t *p_src, + uint8_t *p_ref ); +void x264_sub16x16_dct_msa( int16_t p_dst[16][16], uint8_t *p_src, + uint8_t *p_ref ); +void x264_sub8x8_dct_dc_msa( int16_t pi_dct[4], uint8_t *p_pix1, + uint8_t *p_pix2 ); +void x264_sub8x16_dct_dc_msa( int16_t pi_dct[8], uint8_t *p_pix1, + uint8_t *p_pix2 ); +void x264_zigzag_scan_4x4_frame_msa( int16_t pi_level[16], int16_t pi_dct[16] ); + +#endif
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/deblock-c.c
Added
@@ -0,0 +1,2010 @@ +/***************************************************************************** + * deblock-c.c: msa deblocking + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Neha Rana <neha.rana@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "macros.h" + +#if !HIGH_BIT_DEPTH +#define AVC_LPF_P0P1P2_OR_Q0Q1Q2( p3_or_q3_org_in, p0_or_q0_org_in, \ + q3_or_p3_org_in, p1_or_q1_org_in, \ + p2_or_q2_org_in, q1_or_p1_org_in, \ + p0_or_q0_out, p1_or_q1_out, p2_or_q2_out ) \ +{ \ + v8i16 threshold; \ + v8i16 const3 = __msa_ldi_h( 3 ); \ + \ + threshold = p0_or_q0_org_in + q3_or_p3_org_in; \ + threshold += p1_or_q1_org_in; \ + \ + p0_or_q0_out = threshold << 1; \ + p0_or_q0_out += p2_or_q2_org_in; \ + p0_or_q0_out += q1_or_p1_org_in; \ + p0_or_q0_out = __msa_srari_h( p0_or_q0_out, 3 ); \ + \ + p1_or_q1_out = p2_or_q2_org_in + threshold; \ + p1_or_q1_out = __msa_srari_h( p1_or_q1_out, 2 ); \ + \ + p2_or_q2_out = p2_or_q2_org_in * const3; \ + p2_or_q2_out += p3_or_q3_org_in; \ + p2_or_q2_out += p3_or_q3_org_in; \ + p2_or_q2_out += threshold; \ + p2_or_q2_out = __msa_srari_h( p2_or_q2_out, 3 ); \ +} + +/* data[-u32_u_img_width] = ( uint8_t )( ( 2 * p1 + p0 + q1 + 2 ) >> 2 ); */ +#define AVC_LPF_P0_OR_Q0( p0_or_q0_org_in, q1_or_p1_org_in, \ + p1_or_q1_org_in, p0_or_q0_out ) \ +{ \ + p0_or_q0_out = p0_or_q0_org_in + q1_or_p1_org_in; \ + p0_or_q0_out += p1_or_q1_org_in; \ + p0_or_q0_out += p1_or_q1_org_in; \ + p0_or_q0_out = __msa_srari_h( p0_or_q0_out, 2 ); \ +} + +#define AVC_LPF_P1_OR_Q1( p0_or_q0_org_in, q0_or_p0_org_in, \ + p1_or_q1_org_in, p2_or_q2_org_in, \ + negate_tc_in, tc_in, p1_or_q1_out ) \ +{ \ + v8i16 clip3, temp; \ + \ + clip3 = ( v8i16 ) __msa_aver_u_h( ( v8u16 ) p0_or_q0_org_in, \ + ( v8u16 ) q0_or_p0_org_in ); \ + temp = p1_or_q1_org_in << 1; \ + clip3 -= temp; \ + clip3 = __msa_ave_s_h( p2_or_q2_org_in, clip3 ); \ + clip3 = CLIP_SH( clip3, negate_tc_in, tc_in ); \ + p1_or_q1_out = p1_or_q1_org_in + clip3; \ +} + +#define AVC_LPF_P0Q0( q0_or_p0_org_in, p0_or_q0_org_in, \ + p1_or_q1_org_in, q1_or_p1_org_in, \ + negate_threshold_in, threshold_in, \ + p0_or_q0_out, q0_or_p0_out ) \ +{ \ + v8i16 q0_sub_p0, p1_sub_q1, delta; \ + \ + q0_sub_p0 = q0_or_p0_org_in - p0_or_q0_org_in; \ + p1_sub_q1 = p1_or_q1_org_in - q1_or_p1_org_in; \ + q0_sub_p0 <<= 2; \ + p1_sub_q1 += 4; \ + delta = q0_sub_p0 + p1_sub_q1; \ + delta >>= 3; \ + \ + delta = CLIP_SH( delta, negate_threshold_in, threshold_in ); \ + \ + p0_or_q0_out = p0_or_q0_org_in + delta; \ + q0_or_p0_out = q0_or_p0_org_in - delta; \ + \ + CLIP_SH2_0_255( p0_or_q0_out, q0_or_p0_out ); \ +} + +static void avc_loopfilter_luma_intra_edge_hor_msa( uint8_t *p_data, + uint8_t u_alpha_in, + uint8_t u_beta_in, + uint32_t u_img_width ) +{ + v16u8 p2_asub_p0, q2_asub_q0, p0_asub_q0; + v16u8 alpha, beta; + v16u8 is_less_than, is_less_than_beta, negate_is_less_than_beta; + v16u8 p2, p1, p0, q0, q1, q2; + v16u8 p3_org, p2_org, p1_org, p0_org, q0_org, q1_org, q2_org, q3_org; + v8i16 p1_org_r, p0_org_r, q0_org_r, q1_org_r; + v8i16 p1_org_l, p0_org_l, q0_org_l, q1_org_l; + v8i16 p2_r = { 0 }; + v8i16 p1_r = { 0 }; + v8i16 p0_r = { 0 }; + v8i16 q0_r = { 0 }; + v8i16 q1_r = { 0 }; + v8i16 q2_r = { 0 }; + v8i16 p2_l = { 0 }; + v8i16 p1_l = { 0 }; + v8i16 p0_l = { 0 }; + v8i16 q0_l = { 0 }; + v8i16 q1_l = { 0 }; + v8i16 q2_l = { 0 }; + v16u8 tmp_flag; + v16i8 zero = { 0 }; + + alpha = ( v16u8 ) __msa_fill_b( u_alpha_in ); + beta = ( v16u8 ) __msa_fill_b( u_beta_in ); + + LD_UB4( p_data - ( u_img_width << 1 ), u_img_width, + p1_org, p0_org, q0_org, q1_org ); + + { + v16u8 p1_asub_p0, q1_asub_q0, is_less_than_alpha; + + p0_asub_q0 = __msa_asub_u_b( p0_org, q0_org ); + p1_asub_p0 = __msa_asub_u_b( p1_org, p0_org ); + q1_asub_q0 = __msa_asub_u_b( q1_org, q0_org ); + + is_less_than_alpha = ( p0_asub_q0 < alpha ); + is_less_than_beta = ( p1_asub_p0 < beta ); + is_less_than = is_less_than_beta & is_less_than_alpha; + is_less_than_beta = ( q1_asub_q0 < beta ); + is_less_than = is_less_than_beta & is_less_than; + } + + if( !__msa_test_bz_v( is_less_than ) ) + { + q2_org = LD_UB( p_data + ( 2 * u_img_width ) ); + p3_org = LD_UB( p_data - ( u_img_width << 2 ) ); + p2_org = LD_UB( p_data - ( 3 * u_img_width ) ); + + UNPCK_UB_SH( p1_org, p1_org_r, p1_org_l ); + UNPCK_UB_SH( p0_org, p0_org_r, p0_org_l ); + UNPCK_UB_SH( q0_org, q0_org_r, q0_org_l ); + + tmp_flag = alpha >> 2; + tmp_flag = tmp_flag + 2; + tmp_flag = ( p0_asub_q0 < tmp_flag ); + + p2_asub_p0 = __msa_asub_u_b( p2_org, p0_org ); + is_less_than_beta = ( p2_asub_p0 < beta ); + is_less_than_beta = is_less_than_beta & tmp_flag; + negate_is_less_than_beta = __msa_xori_b( is_less_than_beta, 0xff ); + is_less_than_beta = is_less_than_beta & is_less_than; + negate_is_less_than_beta = negate_is_less_than_beta & is_less_than; + { + v8u16 is_less_than_beta_l, is_less_than_beta_r; + + q1_org_r = ( v8i16 ) __msa_ilvr_b( zero, ( v16i8 ) q1_org ); + + is_less_than_beta_r = + ( v8u16 ) __msa_sldi_b( ( v16i8 ) is_less_than_beta, zero, 8 ); + if( !__msa_test_bz_v( ( v16u8 ) is_less_than_beta_r ) ) + { + v8i16 p3_org_r; + + ILVR_B2_SH( zero, p3_org, zero, p2_org, p3_org_r, p2_r ); + AVC_LPF_P0P1P2_OR_Q0Q1Q2( p3_org_r, p0_org_r, + q0_org_r, p1_org_r, + p2_r, q1_org_r, p0_r, p1_r, p2_r ); + } + + q1_org_l = ( v8i16 ) __msa_ilvl_b( zero, ( v16i8 ) q1_org ); + + is_less_than_beta_l = + ( v8u16 ) __msa_sldi_b( zero, ( v16i8 ) is_less_than_beta, 8 ); + + if( !__msa_test_bz_v( ( v16u8 ) is_less_than_beta_l ) ) + { + v8i16 p3_org_l; + + ILVL_B2_SH( zero, p3_org, zero, p2_org, p3_org_l, p2_l ); + AVC_LPF_P0P1P2_OR_Q0Q1Q2( p3_org_l, p0_org_l, + q0_org_l, p1_org_l, + p2_l, q1_org_l, p0_l, p1_l, p2_l );
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/macros.h
Added
@@ -0,0 +1,1952 @@ +/***************************************************************************** + * macros.h: msa macros + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Rishikesh More <rishikesh.more@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_MACROS_H +#define X264_MIPS_MACROS_H + +#include <stdint.h> +#include <msa.h> + +#define LD_B( RTYPE, p_src ) *( ( RTYPE * )( p_src ) ) +#define LD_UB( ... ) LD_B( v16u8, __VA_ARGS__ ) +#define LD_SB( ... ) LD_B( v16i8, __VA_ARGS__ ) + +#define LD_H( RTYPE, p_src ) *( ( RTYPE * )( p_src ) ) +#define LD_SH( ... ) LD_H( v8i16, __VA_ARGS__ ) + +#define LD_W( RTYPE, p_src ) *( ( RTYPE * )( p_src ) ) +#define LD_SW( ... ) LD_W( v4i32, __VA_ARGS__ ) + +#define ST_B( RTYPE, in, p_dst ) *( ( RTYPE * )( p_dst ) ) = ( in ) +#define ST_UB( ... ) ST_B( v16u8, __VA_ARGS__ ) +#define ST_SB( ... ) ST_B( v16i8, __VA_ARGS__ ) + +#define ST_H( RTYPE, in, p_dst ) *( ( RTYPE * )( p_dst ) ) = ( in ) +#define ST_UH( ... ) ST_H( v8u16, __VA_ARGS__ ) +#define ST_SH( ... ) ST_H( v8i16, __VA_ARGS__ ) + +#if ( __mips_isa_rev >= 6 ) + #define LH( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint16_t u_val_h_m; \ + \ + asm volatile ( \ + "lh %[u_val_h_m], %[p_src_m] \n\t" \ + \ + : [u_val_h_m] "=r" ( u_val_h_m ) \ + : [p_src_m] "m" ( *p_src_m ) \ + ); \ + \ + u_val_h_m; \ + } ) + + #define LW( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint32_t u_val_w_m; \ + \ + asm volatile ( \ + "lw %[u_val_w_m], %[p_src_m] \n\t" \ + \ + : [u_val_w_m] "=r" ( u_val_w_m ) \ + : [p_src_m] "m" ( *p_src_m ) \ + ); \ + \ + u_val_w_m; \ + } ) + + #if ( __mips == 64 ) + #define LD( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint64_t u_val_d_m = 0; \ + \ + asm volatile ( \ + "ld %[u_val_d_m], %[p_src_m] \n\t" \ + \ + : [u_val_d_m] "=r" ( u_val_d_m ) \ + : [p_src_m] "m" ( *p_src_m ) \ + ); \ + \ + u_val_d_m; \ + } ) + #else // !( __mips == 64 ) + #define LD( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint32_t u_val0_m, u_val1_m; \ + uint64_t u_val_d_m = 0; \ + \ + u_val0_m = LW( p_src_m ); \ + u_val1_m = LW( p_src_m + 4 ); \ + \ + u_val_d_m = ( uint64_t ) ( u_val1_m ); \ + u_val_d_m = ( uint64_t ) ( ( u_val_d_m << 32 ) & \ + 0xFFFFFFFF00000000 ); \ + u_val_d_m = ( uint64_t ) ( u_val_d_m | ( uint64_t ) u_val0_m ); \ + \ + u_val_d_m; \ + } ) + #endif // ( __mips == 64 ) + + #define SH( u_val, p_dst ) \ + { \ + uint8_t *p_dst_m = ( uint8_t * ) ( p_dst ); \ + uint16_t u_val_h_m = ( u_val ); \ + \ + asm volatile ( \ + "sh %[u_val_h_m], %[p_dst_m] \n\t" \ + \ + : [p_dst_m] "=m" ( *p_dst_m ) \ + : [u_val_h_m] "r" ( u_val_h_m ) \ + ); \ + } + + #define SW( u_val, p_dst ) \ + { \ + uint8_t *p_dst_m = ( uint8_t * ) ( p_dst ); \ + uint32_t u_val_w_m = ( u_val ); \ + \ + asm volatile ( \ + "sw %[u_val_w_m], %[p_dst_m] \n\t" \ + \ + : [p_dst_m] "=m" ( *p_dst_m ) \ + : [u_val_w_m] "r" ( u_val_w_m ) \ + ); \ + } + + #define SD( u_val, p_dst ) \ + { \ + uint8_t *p_dst_m = ( uint8_t * ) ( p_dst ); \ + uint64_t u_val_d_m = ( u_val ); \ + \ + asm volatile ( \ + "sd %[u_val_d_m], %[p_dst_m] \n\t" \ + \ + : [p_dst_m] "=m" ( *p_dst_m ) \ + : [u_val_d_m] "r" ( u_val_d_m ) \ + ); \ + } + +#else // !( __mips_isa_rev >= 6 ) + #define LH( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint16_t u_val_h_m; \ + \ + asm volatile ( \ + "ulh %[u_val_h_m], %[p_src_m] \n\t" \ + \ + : [u_val_h_m] "=r" ( u_val_h_m ) \ + : [p_src_m] "m" ( *p_src_m ) \ + ); \ + \ + u_val_h_m; \ + } ) + + #define LW( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint32_t u_val_w_m; \ + \ + asm volatile ( \ + "ulw %[u_val_w_m], %[p_src_m] \n\t" \ + \ + : [u_val_w_m] "=r" ( u_val_w_m ) \ + : [p_src_m] "m" ( *p_src_m ) \ + ); \ + \ + u_val_w_m; \ + } ) + + #if ( __mips == 64 ) + #define LD( p_src ) \ + ( { \ + uint8_t *p_src_m = ( uint8_t * ) ( p_src ); \ + uint64_t u_val_d_m = 0; \ + \ + asm volatile ( \ + "uld %[u_val_d_m], %[p_src_m] \n\t" \ + \ + : [u_val_d_m] "=r" ( u_val_d_m ) \ + : [p_src_m] "m" ( *p_src_m ) \ + ); \ + \ + u_val_d_m; \ + } )
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/mc-c.c
Added
@@ -0,0 +1,3807 @@ +/***************************************************************************** + * mc-c.c: msa motion compensation + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Neha Rana <neha.rana@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "macros.h" +#include "mc.h" + +#if !HIGH_BIT_DEPTH +static const uint8_t pu_luma_mask_arr[16 * 8] = +{ + /* 8 width cases */ + 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12, + 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11, + 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + /* 4 width cases */ + 0, 5, 1, 6, 2, 7, 3, 8, 16, 21, 17, 22, 18, 23, 19, 24, + 1, 4, 2, 5, 3, 6, 4, 7, 17, 20, 18, 21, 19, 22, 20, 23, + 2, 3, 3, 4, 4, 5, 5, 6, 18, 19, 19, 20, 20, 21, 21, 22, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26 +}; + +static const uint8_t pu_chroma_mask_arr[16 * 5] = +{ + 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20, + 0, 2, 2, 4, 4, 6, 6, 8, 16, 18, 18, 20, 20, 22, 22, 24, + 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, + 0, 1, 1, 2, 16, 17, 17, 18, 4, 5, 5, 6, 6, 7, 7, 8, + 0, 1, 1, 2, 16, 17, 17, 18, 16, 17, 17, 18, 18, 19, 19, 20 +}; + +void x264_mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + int32_t i_height ); +void x264_mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + int32_t i_height ); +void x264_mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src, + intptr_t i_src_stride, int32_t i_height ); +void x264_memzero_aligned_msa( void *p_dst, size_t n ); + +void x264_pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); +void x264_pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, + uint8_t *p_pix2, intptr_t i_pix2_stride, + uint8_t *p_pix3, intptr_t i_pix3_stride, + int32_t i_weight ); + +void x264_mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ); +void x264_mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ); +void x264_mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ); +void x264_mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ); + +weight_fn_t x264_mc_weight_wtab_msa[6] = +{ + x264_mc_weight_w4_msa, + x264_mc_weight_w4_msa, + x264_mc_weight_w8_msa, + x264_mc_weight_w16_msa, + x264_mc_weight_w16_msa, + x264_mc_weight_w20_msa, +}; + +void x264_mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src[4], intptr_t i_src_stride, + int32_t m_vx, int32_t m_vy, + int32_t i_width, int32_t i_height, + const x264_weight_t *pWeight ); +uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride, + uint8_t *p_src[4], intptr_t i_src_stride, + int32_t m_vx, int32_t m_vy, + int32_t i_width, int32_t i_height, + const x264_weight_t *pWeight ); +void x264_mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v, + intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + int32_t m_vx, int32_t m_vy, + int32_t i_width, int32_t i_height ); +void x264_hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v, + uint8_t *p_dstc, uint8_t *p_src, + intptr_t i_stride, int32_t i_width, + int32_t i_height, int16_t *p_buf ); + +void x264_plane_copy_interleave_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src0, intptr_t i_src_stride0, + uint8_t *p_src1, intptr_t i_src_stride1, + int32_t i_width, int32_t i_height ); +void x264_plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0, + uint8_t *p_dst1, intptr_t i_dst_stride1, + uint8_t *p_src, intptr_t i_src_stride, + int32_t i_width, int32_t i_height ); +void x264_plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0, + intptr_t i_dst_stride0, + uint8_t *p_dst1, + intptr_t i_dst_stride1, + uint8_t *p_dst2, + intptr_t i_dst_stride2, + uint8_t *p_src, + intptr_t i_src_stride, + int32_t i_src_width, int32_t i_width, + int32_t i_height ); +void x264_store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src0, uint8_t *p_src1, + int32_t i_height ); +void x264_load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src, + intptr_t i_src_stride, + int32_t i_height ); +void x264_load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src, + intptr_t i_src_stride, + int32_t i_height ); +void x264_frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0, + uint8_t *p_dst1, uint8_t *p_dst2, + uint8_t *p_dst3, intptr_t i_src_stride, + intptr_t i_dst_stride, int32_t i_width, + int32_t i_height ); + +static void avc_luma_hz_16w_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_dst, int32_t i_dst_stride, + int32_t i_height ) +{ + uint32_t u_loop_cnt, u_h4w; + v16u8 dst0; + v16i8 src0, src1, src2, src3, src4, src5, src6, src7; + v8i16 res0, res1, res2, res3, res4, res5, res6, res7; + v16i8 mask0, mask1, mask2; + v16i8 vec0, vec1, vec2, vec3, vec4, vec5; + v16i8 vec6, vec7, vec8, vec9, vec10, vec11; + v16i8 minus5b = __msa_ldi_b( -5 ); + v16i8 plus20b = __msa_ldi_b( 20 ); + + u_h4w = i_height % 4; + LD_SB3( &pu_luma_mask_arr[0], 16, mask0, mask1, mask2 ); + + for( u_loop_cnt = ( i_height >> 2 ); u_loop_cnt--; ) + { + LD_SB2( p_src, 8, src0, src1 ); + p_src += i_src_stride; + LD_SB2( p_src, 8, src2, src3 ); + p_src += i_src_stride;
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/mc.h
Added
@@ -0,0 +1,31 @@ +/***************************************************************************** + * mc.h: msa motion compensation + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Neha Rana <neha.rana@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_MC_H +#define X264_MIPS_MC_H + +void x264_mc_init_mips( int cpu, x264_mc_functions_t *pf ); + +#endif
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/pixel-c.c
Added
@@ -0,0 +1,1491 @@ +/***************************************************************************** + * pixel-c.c: msa pixel metrics + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "macros.h" +#include "pixel.h" +#include "predict.h" + +#if !HIGH_BIT_DEPTH +#define CALC_MSE_B( src, ref, var ) \ +{ \ + v16u8 src_l0_m, src_l1_m; \ + v8i16 res_l0_m, res_l1_m; \ + \ + ILVRL_B2_UB( src, ref, src_l0_m, src_l1_m ); \ + HSUB_UB2_SH( src_l0_m, src_l1_m, res_l0_m, res_l1_m ); \ + DPADD_SH2_SW( res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var ); \ +} + +#define CALC_MSE_AVG_B( src, ref, var, sub ) \ +{ \ + v16u8 src_l0_m, src_l1_m; \ + v8i16 res_l0_m, res_l1_m; \ + \ + ILVRL_B2_UB( src, ref, src_l0_m, src_l1_m ); \ + HSUB_UB2_SH( src_l0_m, src_l1_m, res_l0_m, res_l1_m ); \ + DPADD_SH2_SW( res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var ); \ + \ + sub += res_l0_m + res_l1_m; \ +} + +#define VARIANCE_WxH( sse, diff, shift ) \ + ( ( sse ) - ( ( ( uint32_t )( diff ) * ( diff ) ) >> ( shift ) ) ) + +static uint32_t sad_4width_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_ref, int32_t i_ref_stride, + int32_t i_height ) +{ + int32_t i_ht_cnt; + uint32_t u_src0, u_src1, u_src2, u_src3, u_ref0, u_ref1, u_ref2, u_ref3; + v16u8 src = { 0 }; + v16u8 ref = { 0 }; + v16u8 diff; + v8u16 sad = { 0 }; + + for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; ) + { + LW4( p_src, i_src_stride, u_src0, u_src1, u_src2, u_src3 ); + p_src += ( 4 * i_src_stride ); + LW4( p_ref, i_ref_stride, u_ref0, u_ref1, u_ref2, u_ref3 ); + p_ref += ( 4 * i_ref_stride ); + + INSERT_W4_UB( u_src0, u_src1, u_src2, u_src3, src ); + INSERT_W4_UB( u_ref0, u_ref1, u_ref2, u_ref3, ref ); + + diff = __msa_asub_u_b( src, ref ); + sad += __msa_hadd_u_h( diff, diff ); + } + + return ( HADD_UH_U32( sad ) ); +} + +static uint32_t sad_8width_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_ref, int32_t i_ref_stride, + int32_t i_height ) +{ + int32_t i_ht_cnt; + v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3; + v8u16 sad = { 0 }; + + for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; ) + { + LD_UB4( p_src, i_src_stride, src0, src1, src2, src3 ); + p_src += ( 4 * i_src_stride ); + LD_UB4( p_ref, i_ref_stride, ref0, ref1, ref2, ref3 ); + p_ref += ( 4 * i_ref_stride ); + + PCKEV_D4_UB( src1, src0, src3, src2, ref1, ref0, ref3, ref2, + src0, src1, ref0, ref1 ); + sad += SAD_UB2_UH( src0, src1, ref0, ref1 ); + } + + return ( HADD_UH_U32( sad ) ); +} + +static uint32_t sad_16width_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_ref, int32_t i_ref_stride, + int32_t i_height ) +{ + int32_t i_ht_cnt; + v16u8 src0, src1, ref0, ref1; + v8u16 sad = { 0 }; + + for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; ) + { + LD_UB2( p_src, i_src_stride, src0, src1 ); + p_src += ( 2 * i_src_stride ); + LD_UB2( p_ref, i_ref_stride, ref0, ref1 ); + p_ref += ( 2 * i_ref_stride ); + sad += SAD_UB2_UH( src0, src1, ref0, ref1 ); + + LD_UB2( p_src, i_src_stride, src0, src1 ); + p_src += ( 2 * i_src_stride ); + LD_UB2( p_ref, i_ref_stride, ref0, ref1 ); + p_ref += ( 2 * i_ref_stride ); + sad += SAD_UB2_UH( src0, src1, ref0, ref1 ); + } + + return ( HADD_UH_U32( sad ) ); +} + +static void sad_4width_x3d_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_ref0, uint8_t *p_ref1, + uint8_t *p_ref2, int32_t i_ref_stride, + int32_t i_height, uint32_t *pu_sad_array ) +{ + int32_t i_ht_cnt; + v16u8 src = { 0 }; + uint32_t src0, src1, src2, src3, load0, load1, load2, load3; + v16u8 ref0 = { 0 }; + v16u8 ref1 = { 0 }; + v16u8 ref2 = { 0 }; + v16u8 diff; + v8u16 sad0 = { 0 }; + v8u16 sad1 = { 0 }; + v8u16 sad2 = { 0 }; + + for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; ) + { + LW4( p_src, i_src_stride, src0, src1, src2, src3 ); + INSERT_W4_UB( src0, src1, src2, src3, src ); + p_src += ( 4 * i_src_stride ); + + LW4( p_ref0, i_ref_stride, load0, load1, load2, load3 ); + INSERT_W4_UB( load0, load1, load2, load3, ref0 ); + p_ref0 += ( 4 * i_ref_stride ); + + LW4( p_ref1, i_ref_stride, load0, load1, load2, load3 ); + INSERT_W4_UB( load0, load1, load2, load3, ref1 ); + p_ref1 += ( 4 * i_ref_stride ); + + LW4( p_ref2, i_ref_stride, load0, load1, load2, load3 ); + INSERT_W4_UB( load0, load1, load2, load3, ref2 ); + p_ref2 += ( 4 * i_ref_stride ); + + diff = __msa_asub_u_b( src, ref0 ); + sad0 += __msa_hadd_u_h( diff, diff ); + + diff = __msa_asub_u_b( src, ref1 ); + sad1 += __msa_hadd_u_h( diff, diff ); + + diff = __msa_asub_u_b( src, ref2 ); + sad2 += __msa_hadd_u_h( diff, diff ); + } + + pu_sad_array[0] = HADD_UH_U32( sad0 ); + pu_sad_array[1] = HADD_UH_U32( sad1 ); + pu_sad_array[2] = HADD_UH_U32( sad2 ); +} + +static void sad_8width_x3d_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_ref0, uint8_t *p_ref1, + uint8_t *p_ref2, int32_t i_ref_stride, + int32_t i_height, uint32_t *pu_sad_array ) +{ + int32_t i_ht_cnt; + v16u8 src0, src1, src2, src3; + v16u8 ref0, ref1, ref00, ref11, ref22, ref33; + v8u16 sad0 = { 0 }; + v8u16 sad1 = { 0 }; + v8u16 sad2 = { 0 }; + + for ( i_ht_cnt = ( i_height >> 2 ); i_ht_cnt--; ) + { + LD_UB4( p_src, i_src_stride, src0, src1, src2, src3 ); + p_src += ( 4 * i_src_stride ); + LD_UB4( p_ref0, i_ref_stride, ref00, ref11, ref22, ref33 );
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/pixel.h
Added
@@ -0,0 +1,170 @@ +/***************************************************************************** + * pixel.h: msa pixel metrics + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_SAD_H +#define X264_MIPS_SAD_H + +int32_t x264_pixel_sad_16x16_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_16x8_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_8x16_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_8x8_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_8x4_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_4x16_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_4x8_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_sad_4x4_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +void x264_pixel_sad_x4_16x16_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x4_16x8_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x4_8x16_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x4_8x8_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x4_8x4_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x4_4x8_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x4_4x4_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + uint8_t *p_ref3, intptr_t i_ref_stride, + int32_t p_sad_array[4] ); +void x264_pixel_sad_x3_16x16_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +void x264_pixel_sad_x3_16x8_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +void x264_pixel_sad_x3_8x16_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +void x264_pixel_sad_x3_8x8_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +void x264_pixel_sad_x3_8x4_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +void x264_pixel_sad_x3_4x8_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +void x264_pixel_sad_x3_4x4_msa( uint8_t *p_src, uint8_t *p_ref0, + uint8_t *p_ref1, uint8_t *p_ref2, + intptr_t i_ref_stride, + int32_t p_sad_array[3] ); +int32_t x264_pixel_ssd_16x16_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_16x8_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_8x16_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_8x8_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_8x4_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_4x16_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_4x8_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +int32_t x264_pixel_ssd_4x4_msa( uint8_t *p_src, intptr_t i_src_stride, + uint8_t *p_ref, intptr_t i_ref_stride ); +void x264_intra_sad_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec, + int32_t p_sad_array[3] ); +void x264_intra_sad_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec, + int32_t p_sad_array[3] ); +void x264_intra_sad_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36], + int32_t p_sad_array[3] ); +void x264_intra_sad_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec, + int32_t p_sad_array[3] ); +void x264_ssim_4x4x2_core_msa( const uint8_t *p_pix1, intptr_t i_stride1, + const uint8_t *p_pix2, intptr_t i_stride2, + int32_t i_sums[2][4] ); +uint64_t x264_pixel_hadamard_ac_8x8_msa( uint8_t *p_pix, intptr_t i_stride ); +uint64_t x264_pixel_hadamard_ac_8x16_msa( uint8_t *p_pix, intptr_t i_stride ); +uint64_t x264_pixel_hadamard_ac_16x8_msa( uint8_t *p_pix, intptr_t i_stride ); +uint64_t x264_pixel_hadamard_ac_16x16_msa( uint8_t *p_pix, intptr_t i_stride ); +int32_t x264_pixel_satd_4x4_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_4x8_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_4x16_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_8x4_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_8x8_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_8x16_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_16x8_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_satd_16x16_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_sa8d_8x8_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +int32_t x264_pixel_sa8d_16x16_msa( uint8_t *p_pix1, intptr_t i_stride, + uint8_t *p_pix2, intptr_t i_stride2 ); +void x264_intra_satd_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec, + int32_t p_sad_array[3] ); +void x264_intra_satd_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec, + int32_t p_sad_array[3] ); +void x264_intra_sa8d_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36], + int32_t p_sad_array[3] ); +void x264_intra_satd_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec, + int32_t p_sad_array[3] ); +uint64_t x264_pixel_var_16x16_msa( uint8_t *p_pix, intptr_t i_stride ); +uint64_t x264_pixel_var_8x16_msa( uint8_t *p_pix, intptr_t i_stride ); +uint64_t x264_pixel_var_8x8_msa( uint8_t *p_pix, intptr_t i_stride ); +int32_t x264_pixel_var2_8x16_msa( uint8_t *p_pix1, intptr_t i_stride1, + uint8_t *p_pix2, intptr_t i_stride2, + int32_t *p_ssd ); +int32_t x264_pixel_var2_8x8_msa( uint8_t *p_pix1, intptr_t i_stride1, + uint8_t *p_pix2, intptr_t i_stride2, + int32_t *p_ssd ); + +#endif
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/predict-c.c
Added
@@ -0,0 +1,607 @@ +/***************************************************************************** + * predict-c.c: msa intra prediction + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Mandar Sahastrabuddhe <mandar.sahastrabuddhe@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "macros.h" + +#if !HIGH_BIT_DEPTH +static void intra_predict_vert_4x4_msa( uint8_t *p_src, uint8_t *p_dst, + int32_t i_dst_stride ) +{ + uint32_t u_src_data; + + u_src_data = LW( p_src ); + + SW4( u_src_data, u_src_data, u_src_data, u_src_data, p_dst, i_dst_stride ); +} + +static void intra_predict_vert_8x8_msa( uint8_t *p_src, uint8_t *p_dst, + int32_t i_dst_stride ) +{ + uint64_t u_out; + + u_out = LD( p_src ); + + SD4( u_out, u_out, u_out, u_out, p_dst, i_dst_stride ); + p_dst += ( 4 * i_dst_stride ); + SD4( u_out, u_out, u_out, u_out, p_dst, i_dst_stride ); +} + +static void intra_predict_vert_16x16_msa( uint8_t *p_src, uint8_t *p_dst, + int32_t i_dst_stride ) +{ + v16u8 src0 = LD_UB( p_src ); + + ST_UB8( src0, src0, src0, src0, src0, src0, src0, src0, p_dst, + i_dst_stride ); + p_dst += ( 8 * i_dst_stride ); + ST_UB8( src0, src0, src0, src0, src0, src0, src0, src0, p_dst, + i_dst_stride ); +} + +static void intra_predict_horiz_4x4_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_dst, int32_t i_dst_stride ) +{ + uint32_t u_out0, u_out1, u_out2, u_out3; + + u_out0 = p_src[0 * i_src_stride] * 0x01010101; + u_out1 = p_src[1 * i_src_stride] * 0x01010101; + u_out2 = p_src[2 * i_src_stride] * 0x01010101; + u_out3 = p_src[3 * i_src_stride] * 0x01010101; + + SW4( u_out0, u_out1, u_out2, u_out3, p_dst, i_dst_stride ); +} + +static void intra_predict_horiz_8x8_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_dst, int32_t i_dst_stride ) +{ + uint64_t u_out0, u_out1, u_out2, u_out3, u_out4, u_out5, u_out6, u_out7; + + u_out0 = p_src[0 * i_src_stride] * 0x0101010101010101ull; + u_out1 = p_src[1 * i_src_stride] * 0x0101010101010101ull; + u_out2 = p_src[2 * i_src_stride] * 0x0101010101010101ull; + u_out3 = p_src[3 * i_src_stride] * 0x0101010101010101ull; + u_out4 = p_src[4 * i_src_stride] * 0x0101010101010101ull; + u_out5 = p_src[5 * i_src_stride] * 0x0101010101010101ull; + u_out6 = p_src[6 * i_src_stride] * 0x0101010101010101ull; + u_out7 = p_src[7 * i_src_stride] * 0x0101010101010101ull; + + SD4( u_out0, u_out1, u_out2, u_out3, p_dst, i_dst_stride ); + p_dst += ( 4 * i_dst_stride ); + SD4( u_out4, u_out5, u_out6, u_out7, p_dst, i_dst_stride ); +} + +static void intra_predict_horiz_16x16_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_dst, + int32_t i_dst_stride ) +{ + uint32_t u_row; + uint8_t u_inp0, u_inp1, u_inp2, u_inp3; + v16u8 src0, src1, src2, src3; + + for ( u_row = 4; u_row--; ) + { + u_inp0 = p_src[0]; + p_src += i_src_stride; + u_inp1 = p_src[0]; + p_src += i_src_stride; + u_inp2 = p_src[0]; + p_src += i_src_stride; + u_inp3 = p_src[0]; + p_src += i_src_stride; + + src0 = ( v16u8 ) __msa_fill_b( u_inp0 ); + src1 = ( v16u8 ) __msa_fill_b( u_inp1 ); + src2 = ( v16u8 ) __msa_fill_b( u_inp2 ); + src3 = ( v16u8 ) __msa_fill_b( u_inp3 ); + + ST_UB4( src0, src1, src2, src3, p_dst, i_dst_stride ); + p_dst += ( 4 * i_dst_stride ); + } +} + +static void intra_predict_dc_4x4_msa( uint8_t *p_src_top, uint8_t *p_src_left, + int32_t i_src_stride_left, + uint8_t *p_dst, int32_t i_dst_stride, + uint8_t is_above, uint8_t is_left ) +{ + uint32_t u_row; + uint32_t u_out, u_addition = 0; + v16u8 src_above, store; + v8u16 sum_above; + v4u32 sum; + + if ( is_left && is_above ) + { + src_above = LD_UB( p_src_top ); + + sum_above = __msa_hadd_u_h( src_above, src_above ); + sum = __msa_hadd_u_w( sum_above, sum_above ); + u_addition = __msa_copy_u_w( ( v4i32 ) sum, 0 ); + + for ( u_row = 0; u_row < 4; u_row++ ) + { + u_addition += p_src_left[u_row * i_src_stride_left]; + } + + u_addition = ( u_addition + 4 ) >> 3; + store = ( v16u8 ) __msa_fill_b( u_addition ); + } + else if ( is_left ) + { + for ( u_row = 0; u_row < 4; u_row++ ) + { + u_addition += p_src_left[u_row * i_src_stride_left]; + } + + u_addition = ( u_addition + 2 ) >> 2; + store = ( v16u8 ) __msa_fill_b( u_addition ); + } + else if ( is_above ) + { + src_above = LD_UB( p_src_top ); + + sum_above = __msa_hadd_u_h( src_above, src_above ); + sum = __msa_hadd_u_w( sum_above, sum_above ); + sum = ( v4u32 ) __msa_srari_w( ( v4i32 ) sum, 2 ); + store = ( v16u8 ) __msa_splati_b( ( v16i8 ) sum, 0 ); + } + else + { + store = ( v16u8 ) __msa_ldi_b( 128 ); + } + + u_out = __msa_copy_u_w( ( v4i32 ) store, 0 ); + + SW4( u_out, u_out, u_out, u_out, p_dst, i_dst_stride ); +} + +static void intra_predict_dc_8x8_msa( uint8_t *p_src_top, uint8_t *p_src_left, + uint8_t *p_dst, int32_t i_dst_stride ) +{ + uint64_t u_val0, u_val1; + v16i8 store; + v16u8 src = { 0 }; + v8u16 sum_h; + v4u32 sum_w; + v2u64 sum_d; + + u_val0 = LD( p_src_top ); + u_val1 = LD( p_src_left ); + INSERT_D2_UB( u_val0, u_val1, src ); + sum_h = __msa_hadd_u_h( src, src ); + sum_w = __msa_hadd_u_w( sum_h, sum_h ); + sum_d = __msa_hadd_u_d( sum_w, sum_w ); + sum_w = ( v4u32 ) __msa_pckev_w( ( v4i32 ) sum_d, ( v4i32 ) sum_d ); + sum_d = __msa_hadd_u_d( sum_w, sum_w ); + sum_w = ( v4u32 ) __msa_srari_w( ( v4i32 ) sum_d, 4 );
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/predict.h
Added
@@ -0,0 +1,48 @@ +/***************************************************************************** + * predict.h: msa intra prediction + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Rishikesh More <rishikesh.more@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_PREDICT_H +#define X264_MIPS_PREDICT_H + +void x264_intra_predict_dc_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_dc_left_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_dc_top_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_dc_128_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_hor_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_vert_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_plane_16x16_msa( uint8_t *p_src ); +void x264_intra_predict_dc_4blk_8x8_msa( uint8_t *p_src ); +void x264_intra_predict_hor_8x8_msa( uint8_t *p_src ); +void x264_intra_predict_vert_8x8_msa( uint8_t *p_src ); +void x264_intra_predict_plane_8x8_msa( uint8_t *p_src ); +void x264_intra_predict_ddl_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +void x264_intra_predict_dc_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +void x264_intra_predict_h_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +void x264_intra_predict_v_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +void x264_intra_predict_dc_4x4_msa( uint8_t *p_src ); +void x264_intra_predict_hor_4x4_msa( uint8_t *p_src ); +void x264_intra_predict_vert_4x4_msa( uint8_t *p_src ); + +#endif
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/quant-c.c
Added
@@ -0,0 +1,630 @@ +/***************************************************************************** + * quant-c.c: msa quantization and level-run + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Rishikesh More <rishikesh.more@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/common.h" +#include "macros.h" + +#if !HIGH_BIT_DEPTH +static void avc_dequant_4x4_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16], + int32_t i_qp ) +{ + const int32_t i_mf = i_qp % 6; + const int32_t q_bits = i_qp / 6 - 4; + v8i16 dct0, dct1; + v4i32 dequant_m_f0, dequant_m_f1, dequant_m_f2, dequant_m_f3; + + LD_SH2( p_dct, 8, dct0, dct1 ); + + LD_SW2( pi_dequant_mf[i_mf], 4, dequant_m_f0, dequant_m_f1 ); + LD_SW2( pi_dequant_mf[i_mf] + 8, 4, dequant_m_f2, dequant_m_f3 ); + + if ( q_bits >= 0 ) + { + v8i16 dequant_mf_h0, dequant_mf_h1, q_bits_vec; + + q_bits_vec = __msa_fill_h( q_bits ); + + PCKEV_H2_SH( dequant_m_f1, dequant_m_f0, dequant_m_f3, dequant_m_f2, + dequant_mf_h0, dequant_mf_h1 ); + + dct0 *= dequant_mf_h0; + dct1 *= dequant_mf_h1; + dct0 <<= q_bits_vec; + dct1 <<= q_bits_vec; + ST_SH2( dct0, dct1, p_dct, 8 ); + } + else + { + const int32_t q_bits_add = 1 << ( -q_bits - 1 ); + v4i32 dct_signed_w0, dct_signed_w1, dct_signed_w2, dct_signed_w3; + v4i32 q_bits_vec, q_bits_vec_add; + + q_bits_vec_add = __msa_fill_w( q_bits_add ); + q_bits_vec = __msa_fill_w( -q_bits ); + + UNPCK_SH_SW( dct0, dct_signed_w0, dct_signed_w1 ); + UNPCK_SH_SW( dct1, dct_signed_w2, dct_signed_w3 ); + + dct_signed_w0 *= dequant_m_f0; + dct_signed_w1 *= dequant_m_f1; + dct_signed_w2 *= dequant_m_f2; + dct_signed_w3 *= dequant_m_f3; + dct_signed_w0 += q_bits_vec_add; + dct_signed_w1 += q_bits_vec_add; + dct_signed_w2 += q_bits_vec_add; + dct_signed_w3 += q_bits_vec_add; + + SRA_4V( dct_signed_w0, dct_signed_w1, dct_signed_w2, dct_signed_w3, + q_bits_vec ); + PCKEV_H2_SH( dct_signed_w1, dct_signed_w0, dct_signed_w3, dct_signed_w2, + dct0, dct1 ); + ST_SH2( dct0, dct1, p_dct, 8 ); + } +} + +static void avc_dequant_8x8_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][64], + int32_t i_qp ) +{ + const int32_t i_mf = i_qp % 6; + const int32_t q_bits = i_qp / 6 - 6; + v8i16 dct0, dct1, dct2, dct3, dct4, dct5, dct6, dct7; + v4i32 dequant_m_f0, dequant_m_f1, dequant_m_f2, dequant_m_f3; + v4i32 dequant_m_f4, dequant_m_f5, dequant_m_f6, dequant_m_f7; + v4i32 dequant_m_f8, dequant_m_f9, dequant_m_f10, dequant_m_f11; + v4i32 dequant_m_f12, dequant_m_f13, dequant_m_f14, dequant_m_f15; + + LD_SH8( p_dct, 8, dct0, dct1, dct2, dct3, dct4, dct5, dct6, dct7 ); + + LD_SW2( pi_dequant_mf[i_mf], 4, dequant_m_f0, dequant_m_f1 ); + LD_SW2( pi_dequant_mf[i_mf] + 8, 4, dequant_m_f2, dequant_m_f3 ); + LD_SW2( pi_dequant_mf[i_mf] + 16, 4, dequant_m_f4, dequant_m_f5 ); + LD_SW2( pi_dequant_mf[i_mf] + 24, 4, dequant_m_f6, dequant_m_f7 ); + LD_SW2( pi_dequant_mf[i_mf] + 32, 4, dequant_m_f8, dequant_m_f9 ); + LD_SW2( pi_dequant_mf[i_mf] + 40, 4, dequant_m_f10, dequant_m_f11 ); + LD_SW2( pi_dequant_mf[i_mf] + 48, 4, dequant_m_f12, dequant_m_f13 ); + LD_SW2( pi_dequant_mf[i_mf] + 56, 4, dequant_m_f14, dequant_m_f15 ); + + if ( q_bits >= 0 ) + { + v8i16 q_bits_vec; + v8i16 dequant_mf_h0, dequant_mf_h1, dequant_mf_h2, dequant_mf_h3; + v8i16 dequant_mf_h4, dequant_mf_h5, dequant_mf_h6, dequant_mf_h7; + + q_bits_vec = __msa_fill_h( q_bits ); + + PCKEV_H4_SH( dequant_m_f1, dequant_m_f0, dequant_m_f3, dequant_m_f2, + dequant_m_f5, dequant_m_f4, dequant_m_f7, dequant_m_f6, + dequant_mf_h0, dequant_mf_h1, + dequant_mf_h2, dequant_mf_h3 ); + PCKEV_H4_SH( dequant_m_f9, dequant_m_f8, dequant_m_f11, dequant_m_f10, + dequant_m_f13, dequant_m_f12, dequant_m_f15, dequant_m_f14, + dequant_mf_h4, dequant_mf_h5, + dequant_mf_h6, dequant_mf_h7 ); + + dct0 *= dequant_mf_h0; + dct1 *= dequant_mf_h1; + dct2 *= dequant_mf_h2; + dct3 *= dequant_mf_h3; + dct4 *= dequant_mf_h4; + dct5 *= dequant_mf_h5; + dct6 *= dequant_mf_h6; + dct7 *= dequant_mf_h7; + + SLLI_4V( dct0, dct1, dct2, dct3, q_bits_vec ); + SLLI_4V( dct4, dct5, dct6, dct7, q_bits_vec ); + + ST_SH8( dct0, dct1, dct2, dct3, dct4, dct5, dct6, dct7, p_dct, 8 ); + } + else + { + const int32_t q_bits_add = 1 << ( -q_bits - 1 ); + v4i32 dct_signed_w0, dct_signed_w1, dct_signed_w2, dct_signed_w3; + v4i32 dct_signed_w4, dct_signed_w5, dct_signed_w6, dct_signed_w7; + v4i32 dct_signed_w8, dct_signed_w9, dct_signed_w10, dct_signed_w11; + v4i32 dct_signed_w12, dct_signed_w13, dct_signed_w14, dct_signed_w15; + v4i32 q_bits_vec, q_bits_vec_add; + + q_bits_vec_add = __msa_fill_w( q_bits_add ); + q_bits_vec = __msa_fill_w( -q_bits ); + + UNPCK_SH_SW( dct0, dct_signed_w0, dct_signed_w1 ); + UNPCK_SH_SW( dct1, dct_signed_w2, dct_signed_w3 ); + UNPCK_SH_SW( dct2, dct_signed_w4, dct_signed_w5 ); + UNPCK_SH_SW( dct3, dct_signed_w6, dct_signed_w7 ); + UNPCK_SH_SW( dct4, dct_signed_w8, dct_signed_w9 ); + UNPCK_SH_SW( dct5, dct_signed_w10, dct_signed_w11 ); + UNPCK_SH_SW( dct6, dct_signed_w12, dct_signed_w13 ); + UNPCK_SH_SW( dct7, dct_signed_w14, dct_signed_w15 ); + + dct_signed_w0 *= dequant_m_f0; + dct_signed_w1 *= dequant_m_f1; + dct_signed_w2 *= dequant_m_f2; + dct_signed_w3 *= dequant_m_f3; + dct_signed_w4 *= dequant_m_f4; + dct_signed_w5 *= dequant_m_f5; + dct_signed_w6 *= dequant_m_f6; + dct_signed_w7 *= dequant_m_f7; + dct_signed_w8 *= dequant_m_f8; + dct_signed_w9 *= dequant_m_f9; + dct_signed_w10 *= dequant_m_f10; + dct_signed_w11 *= dequant_m_f11; + dct_signed_w12 *= dequant_m_f12; + dct_signed_w13 *= dequant_m_f13; + dct_signed_w14 *= dequant_m_f14; + dct_signed_w15 *= dequant_m_f15; + + dct_signed_w0 += q_bits_vec_add; + dct_signed_w1 += q_bits_vec_add; + dct_signed_w2 += q_bits_vec_add; + dct_signed_w3 += q_bits_vec_add; + dct_signed_w4 += q_bits_vec_add; + dct_signed_w5 += q_bits_vec_add; + dct_signed_w6 += q_bits_vec_add; + dct_signed_w7 += q_bits_vec_add; + dct_signed_w8 += q_bits_vec_add; + dct_signed_w9 += q_bits_vec_add; + dct_signed_w10 += q_bits_vec_add; + dct_signed_w11 += q_bits_vec_add; + dct_signed_w12 += q_bits_vec_add; + dct_signed_w13 += q_bits_vec_add; + dct_signed_w14 += q_bits_vec_add; + dct_signed_w15 += q_bits_vec_add; + + SRA_4V( dct_signed_w0, dct_signed_w1, dct_signed_w2, dct_signed_w3, + q_bits_vec ); + SRA_4V( dct_signed_w4, dct_signed_w5, dct_signed_w6, dct_signed_w7, + q_bits_vec ); + SRA_4V( dct_signed_w8, dct_signed_w9, dct_signed_w10, dct_signed_w11,
View file
x264-snapshot-20150804-2245.tar.bz2/common/mips/quant.h
Added
@@ -0,0 +1,43 @@ +/***************************************************************************** + * quant.h: msa quantization and level-run + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Rishikesh More <rishikesh.more@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_QUANT_H +#define X264_MIPS_QUANT_H + +void x264_dequant_4x4_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16], + int32_t i_qp ); +void x264_dequant_8x8_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][64], + int32_t i_qp ); +void x264_dequant_4x4_dc_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16], + int32_t i_qp ); +int32_t x264_quant_4x4_msa( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias ); +int32_t x264_quant_4x4x4_msa( int16_t p_dct[4][16], + uint16_t pu_mf[16], uint16_t pu_bias[16] ); +int32_t x264_quant_8x8_msa( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias ); +int32_t x264_quant_4x4_dc_msa( int16_t *p_dct, int32_t i_mf, int32_t i_bias ); +int32_t x264_coeff_last64_msa( int16_t *p_src ); +int32_t x264_coeff_last16_msa( int16_t *p_src ); + +#endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/mvpred.c -> x264-snapshot-20150804-2245.tar.bz2/common/mvpred.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mvpred.c: motion vector prediction ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/opencl.c -> x264-snapshot-20150804-2245.tar.bz2/common/opencl.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * opencl.c: OpenCL initialization and kernel compilation ***************************************************************************** - * Copyright (C) 2012-2014 x264 project + * Copyright (C) 2012-2015 x264 project * * Authors: Steve Borho <sborho@multicorewareinc.com> * Anton Mitrofanov <BugMaster@narod.ru>
View file
x264-snapshot-20141218-2245.tar.bz2/common/opencl.h -> x264-snapshot-20150804-2245.tar.bz2/common/opencl.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * opencl.h: OpenCL structures and defines ***************************************************************************** - * Copyright (C) 2012-2014 x264 project + * Copyright (C) 2012-2015 x264 project * * Authors: Steve Borho <sborho@multicorewareinc.com> * Anton Mitrofanov <BugMaster@narod.ru>
View file
x264-snapshot-20141218-2245.tar.bz2/common/osdep.c -> x264-snapshot-20150804-2245.tar.bz2/common/osdep.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * osdep.c: platform-specific code ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * Laurent Aimar <fenrir@via.ecp.fr> @@ -94,51 +94,6 @@ } #endif -#if HAVE_MMX -#ifdef __INTEL_COMPILER -/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of - * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30) - * adapted to x264's cpu schema. */ - -// Global variable indicating cpu -int __intel_cpu_indicator = 0; -// CPU dispatcher function -void x264_intel_cpu_indicator_init( void ) -{ - unsigned int cpu = x264_cpu_detect(); - if( cpu&X264_CPU_AVX ) - __intel_cpu_indicator = 0x20000; - else if( cpu&X264_CPU_SSE42 ) - __intel_cpu_indicator = 0x8000; - else if( cpu&X264_CPU_SSE4 ) - __intel_cpu_indicator = 0x2000; - else if( cpu&X264_CPU_SSSE3 ) - __intel_cpu_indicator = 0x1000; - else if( cpu&X264_CPU_SSE3 ) - __intel_cpu_indicator = 0x800; - else if( cpu&X264_CPU_SSE2 && !(cpu&X264_CPU_SSE2_IS_SLOW) ) - __intel_cpu_indicator = 0x200; - else if( cpu&X264_CPU_SSE ) - __intel_cpu_indicator = 0x80; - else if( cpu&X264_CPU_MMX2 ) - __intel_cpu_indicator = 8; - else - __intel_cpu_indicator = 1; -} - -/* __intel_cpu_indicator_init appears to have a non-standard calling convention that - * assumes certain registers aren't preserved, so we'll route it through a function - * that backs up all the registers. */ -void __intel_cpu_indicator_init( void ) -{ - x264_safe_intel_cpu_indicator_init(); -} -#else -void x264_intel_cpu_indicator_init( void ) -{} -#endif -#endif - #ifdef _WIN32 /* Functions for dealing with Unicode on Windows. */ FILE *x264_fopen( const char *filename, const char *mode )
View file
x264-snapshot-20141218-2245.tar.bz2/common/osdep.h -> x264-snapshot-20150804-2245.tar.bz2/common/osdep.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * osdep.h: platform-specific code ***************************************************************************** - * Copyright (C) 2007-2014 x264 project + * Copyright (C) 2007-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/pixel.c -> x264-snapshot-20150804-2245.tar.bz2/common/pixel.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -42,6 +42,9 @@ # include "aarch64/pixel.h" # include "aarch64/predict.h" #endif +#if ARCH_MIPS +# include "mips/pixel.h" +#endif /**************************************************************************** @@ -598,8 +601,8 @@ INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon ) INTRA_MBCMP(satd, 8x8, dc, h, v, c, _neon, _neon ) -INTRA_MBCMP( sad, 8x16, dc, h, v, c, _neon, _c ) -INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c ) +INTRA_MBCMP( sad, 8x16, dc, h, v, c, _neon, _neon ) +INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _neon ) INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon ) #endif @@ -1409,25 +1412,28 @@ #if ARCH_AARCH64 if( cpu&X264_CPU_NEON ) { - INIT7( sad, _neon ); + INIT8( sad, _neon ); // AArch64 has no distinct instructions for aligned load/store - INIT7_NAME( sad_aligned, sad, _neon ); + INIT8_NAME( sad_aligned, sad, _neon ); INIT7( sad_x3, _neon ); INIT7( sad_x4, _neon ); - INIT7( ssd, _neon ); - INIT7( satd, _neon ); + INIT8( ssd, _neon ); + INIT8( satd, _neon ); INIT7( satd_x3, _neon ); INIT7( satd_x4, _neon ); INIT4( hadamard_ac, _neon ); pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_neon; pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_neon; + pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_neon; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_neon; pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_neon; pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_neon; pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_neon; pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_neon; + pixf->vsad = x264_pixel_vsad_neon; + pixf->asd8 = x264_pixel_asd8_neon; pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_neon; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_neon; @@ -1440,11 +1446,44 @@ pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_neon; pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon; + pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_neon; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; pixf->ssim_end4 = x264_pixel_ssim_end4_neon; } #endif // ARCH_AARCH64 +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + INIT8( sad, _msa ); + INIT8_NAME( sad_aligned, sad, _msa ); + INIT8( ssd, _msa ); + INIT7( sad_x3, _msa ); + INIT7( sad_x4, _msa ); + INIT8( satd, _msa ); + INIT4( hadamard_ac, _msa ); + + pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_msa; + pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_msa; + pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_msa; + pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_msa; + pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_msa; + pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_msa; + pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_msa; + pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_msa; + + pixf->ssim_4x4x2_core = x264_ssim_4x4x2_core_msa; + + pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_msa; + pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_msa; + pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_msa; + pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_msa; + pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_msa; + pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16; + pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8; + } +#endif // HAVE_MSA + #endif // HIGH_BIT_DEPTH #if HAVE_ALTIVEC if( cpu&X264_CPU_ALTIVEC )
View file
x264-snapshot-20141218-2245.tar.bz2/common/pixel.h -> x264-snapshot-20150804-2245.tar.bz2/common/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2004-2014 x264 project + * Copyright (C) 2004-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/dct.c -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/dct.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> * Eric Petit <eric.petit@lapsus.org> @@ -264,7 +264,7 @@ vec_u8_t lv = vec_ld(0, dest); \ vec_u8_t dstv = vec_perm(lv, zero_u8v, (vec_u8_t)perm_ldv); \ vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \ - vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \ + vec_u16_t dst16 = vec_u8_to_u16_h(dstv); \ vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \ vec_u8_t idstsum8 = vec_s16_to_u8(idstsum); \ /* unaligned store */ \ @@ -384,7 +384,7 @@ vec_u8_t lv = vec_ld( 7, dest ); \ vec_u8_t dstv = vec_perm( hv, lv, (vec_u8_t)perm_ldv ); \ vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \ - vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \ + vec_u16_t dst16 = vec_u8_to_u16_h(dstv); \ vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \ vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum); \ /* unaligned store */ \
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/dct.h -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * Guillaume Poirier <gpoirier@mplayerhq.hu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/deblock.c -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/deblock.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: ppc deblocking ***************************************************************************** - * Copyright (C) 2007-2014 x264 project + * Copyright (C) 2007-2015 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/mc.c -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/mc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * Guillaume Poirier <gpoirier@mplayerhq.hu> @@ -40,24 +40,19 @@ typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src, uint8_t *dst, intptr_t i_dst, int i_height ); - -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - - static inline int x264_tapfilter( uint8_t *pix, int i_pix_next ) { return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] + pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] + pix[ 3*i_pix_next]; } + static inline int x264_tapfilter1( uint8_t *pix ) { return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] + pix[ 3]; } - static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src1, intptr_t i_src1, uint8_t *src2, int i_height ) @@ -181,10 +176,10 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); switch( i_width ) { @@ -229,10 +224,10 @@ { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); switch( i_width ) { case 4: @@ -296,6 +291,12 @@ } } +#ifdef WORDS_BIGENDIAN +#define VSLD(a,b,n) vec_sld(a,b,n) +#else +#define VSLD(a,b,n) vec_sld(b,a,16-n) +#endif + static void mc_chroma_altivec_4xh( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride, uint8_t *src, intptr_t i_src_stride, int mvx, int mvy, int i_height ) @@ -321,8 +322,13 @@ vec_u16_t src0v_16, src1v_16, src2v_16, src3v_16, dstv16; vec_u16_t shiftv, k32v; +#ifdef WORDS_BIGENDIAN static const vec_u8_t perm0v = CV(1,5,9,13,1,5,9,13,1,5,9,13,1,5,9,13); static const vec_u8_t perm1v = CV(3,7,11,15,3,7,11,15,3,7,11,15,3,7,11,15); +#else + static const vec_u8_t perm0v = CV(0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12); + static const vec_u8_t perm1v = CV(2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14); +#endif coeff0v = vec_ld( 0, coeff ); coeff3v = vec_splat( coeff0v, 3 ); @@ -334,7 +340,7 @@ VEC_LOAD( src, src2v_8, 9, vec_u8_t, src ); src2v_16 = vec_u8_to_u16( src2v_8 ); - src3v_16 = vec_u8_to_u16( vec_sld( src2v_8, src2v_8, 2 ) ); + src3v_16 = vec_u8_to_u16( VSLD( src2v_8, src2v_8, 2 ) ); for( int y = 0; y < i_height; y += 2 ) { @@ -342,7 +348,7 @@ src1v_16 = src3v_16; VEC_LOAD( srcp, src2v_8, 9, vec_u8_t, src ); src2v_16 = vec_u8_to_u16( src2v_8 ); - src3v_16 = vec_u8_to_u16( vec_sld( src2v_8, src2v_8, 2 ) ); + src3v_16 = vec_u8_to_u16( VSLD( src2v_8, src2v_8, 2 ) ); dstv16 = vec_mladd( coeff0v, src0v_16, k32v ); dstv16 = vec_mladd( coeff1v, src1v_16, dstv16 ); @@ -364,7 +370,7 @@ src1v_16 = src3v_16; VEC_LOAD( srcp, src2v_8, 9, vec_u8_t, src ); src2v_16 = vec_u8_to_u16( src2v_8 ); - src3v_16 = vec_u8_to_u16( vec_sld( src2v_8, src2v_8, 2 ) ); + src3v_16 = vec_u8_to_u16( VSLD( src2v_8, src2v_8, 2 ) ); dstv16 = vec_mladd( coeff0v, src0v_16, k32v ); dstv16 = vec_mladd( coeff1v, src1v_16, dstv16 ); @@ -420,12 +426,17 @@ k32v = vec_sl( vec_splat_u16( 1 ), vec_splat_u16( 5 ) ); shiftv = vec_splat_u16( 6 ); +#ifdef WORDS_BIGENDIAN static const vec_u8_t perm0v = CV(1,5,9,13,17,21,25,29,0,0,0,0,0,0,0,0); static const vec_u8_t perm1v = CV(3,7,11,15,19,23,27,31,0,0,0,0,0,0,0,0); +#else + static const vec_u8_t perm0v = CV(0,4,8,12,16,20,24,28,1,1,1,1,1,1,1,1); + static const vec_u8_t perm1v = CV(2,6,10,14,18,22,26,30,1,1,1,1,1,1,1,1); +#endif VEC_LOAD( src, src2v_8, 16, vec_u8_t, src ); VEC_LOAD( src+16, src3v_8, 2, vec_u8_t, src ); - src3v_8 = vec_sld( src2v_8, src3v_8, 2 ); + src3v_8 = VSLD( src2v_8, src3v_8, 2 ); for( int y = 0; y < i_height; y += 2 ) { @@ -434,7 +445,7 @@ VEC_LOAD( srcp, src2v_8, 16, vec_u8_t, src ); VEC_LOAD( srcp+16, src3v_8, 2, vec_u8_t, src ); - src3v_8 = vec_sld( src2v_8, src3v_8, 2 ); + src3v_8 = VSLD( src2v_8, src3v_8, 2 ); src0v_16h = vec_u8_to_u16_h( src0v_8 ); src0v_16l = vec_u8_to_u16_l( src0v_8 ); @@ -472,7 +483,7 @@ VEC_LOAD( srcp, src2v_8, 16, vec_u8_t, src ); VEC_LOAD( srcp+16, src3v_8, 2, vec_u8_t, src ); - src3v_8 = vec_sld( src2v_8, src3v_8, 2 ); + src3v_8 = VSLD( src2v_8, src3v_8, 2 ); src0v_16h = vec_u8_to_u16_h( src0v_8 ); src0v_16l = vec_u8_to_u16_l( src0v_8 ); @@ -555,11 +566,11 @@ VEC_LOAD_G( &src[x- 2+i_stride*y], src1v, 16, vec_u8_t); \ VEC_LOAD_G( &src[x+14+i_stride*y], src6v, 16, vec_u8_t); \ \ - src2v = vec_sld( src1v, src6v, 1 ); \ - src3v = vec_sld( src1v, src6v, 2 ); \ - src4v = vec_sld( src1v, src6v, 3 ); \ - src5v = vec_sld( src1v, src6v, 4 ); \ - src6v = vec_sld( src1v, src6v, 5 ); \ + src2v = VSLD( src1v, src6v, 1 ); \ + src3v = VSLD( src1v, src6v, 2 ); \ + src4v = VSLD( src1v, src6v, 3 ); \ + src5v = VSLD( src1v, src6v, 4 ); \ + src6v = VSLD( src1v, src6v, 5 ); \ \ temp1v = vec_u8_to_s16_h( src1v ); \ temp2v = vec_u8_to_s16_h( src2v ); \ @@ -634,12 +645,12 @@ #define HPEL_FILTER_CENTRAL() \ { \ - temp1v = vec_sld( tempav, tempbv, 12 ); \ - temp2v = vec_sld( tempav, tempbv, 14 ); \ + temp1v = VSLD( tempav, tempbv, 12 ); \ + temp2v = VSLD( tempav, tempbv, 14 ); \ temp3v = tempbv; \ - temp4v = vec_sld( tempbv, tempcv, 2 ); \ - temp5v = vec_sld( tempbv, tempcv, 4 ); \ - temp6v = vec_sld( tempbv, tempcv, 6 ); \ + temp4v = VSLD( tempbv, tempcv, 2 ); \ + temp5v = VSLD( tempbv, tempcv, 4 ); \ + temp6v = VSLD( tempbv, tempcv, 6 ); \ \ HPEL_FILTER_2( temp1v, temp2v, temp3v, \ temp4v, temp5v, temp6v ); \ @@ -647,12 +658,12 @@ dest1v = vec_add( temp1v, thirtytwov ); \ dest1v = vec_sra( dest1v, sixv ); \ \ - temp1v = vec_sld( tempbv, tempcv, 12 ); \ - temp2v = vec_sld( tempbv, tempcv, 14 ); \ + temp1v = VSLD( tempbv, tempcv, 12 ); \ + temp2v = VSLD( tempbv, tempcv, 14 ); \ temp3v = tempcv; \ - temp4v = vec_sld( tempcv, tempdv, 2 ); \ - temp5v = vec_sld( tempcv, tempdv, 4 ); \ - temp6v = vec_sld( tempcv, tempdv, 6 ); \ + temp4v = VSLD( tempcv, tempdv, 2 ); \
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/mc.h -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/pixel.c -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/pixel.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * Guillaume Poirier <gpoirier@mplayerhq.hu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/pixel.h -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/ppccommon.h -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/ppccommon.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ppccommon.h: ppc utility macros ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Eric Petit <eric.petit@lapsus.org> * @@ -81,10 +81,17 @@ /*********************************************************************** * 8 <-> 16 bits conversions **********************************************************************/ +#ifdef WORDS_BIGENDIAN #define vec_u8_to_u16_h(v) (vec_u16_t) vec_mergeh( zero_u8v, (vec_u8_t) v ) #define vec_u8_to_u16_l(v) (vec_u16_t) vec_mergel( zero_u8v, (vec_u8_t) v ) #define vec_u8_to_s16_h(v) (vec_s16_t) vec_mergeh( zero_u8v, (vec_u8_t) v ) #define vec_u8_to_s16_l(v) (vec_s16_t) vec_mergel( zero_u8v, (vec_u8_t) v ) +#else +#define vec_u8_to_u16_h(v) (vec_u16_t) vec_mergeh( (vec_u8_t) v, zero_u8v ) +#define vec_u8_to_u16_l(v) (vec_u16_t) vec_mergel( (vec_u8_t) v, zero_u8v ) +#define vec_u8_to_s16_h(v) (vec_s16_t) vec_mergeh( (vec_u8_t) v, zero_u8v ) +#define vec_u8_to_s16_l(v) (vec_s16_t) vec_mergel( (vec_u8_t) v, zero_u8v ) +#endif #define vec_u8_to_u16(v) vec_u8_to_u16_h(v) #define vec_u8_to_s16(v) vec_u8_to_s16_h(v) @@ -96,10 +103,17 @@ /*********************************************************************** * 16 <-> 32 bits conversions **********************************************************************/ +#ifdef WORDS_BIGENDIAN #define vec_u16_to_u32_h(v) (vec_u32_t) vec_mergeh( zero_u16v, (vec_u16_t) v ) #define vec_u16_to_u32_l(v) (vec_u32_t) vec_mergel( zero_u16v, (vec_u16_t) v ) #define vec_u16_to_s32_h(v) (vec_s32_t) vec_mergeh( zero_u16v, (vec_u16_t) v ) #define vec_u16_to_s32_l(v) (vec_s32_t) vec_mergel( zero_u16v, (vec_u16_t) v ) +#else +#define vec_u16_to_u32_h(v) (vec_u32_t) vec_mergeh( (vec_u16_t) v, zero_u16v ) +#define vec_u16_to_u32_l(v) (vec_u32_t) vec_mergel( (vec_u16_t) v, zero_u16v ) +#define vec_u16_to_s32_h(v) (vec_s32_t) vec_mergeh( (vec_u16_t) v, zero_u16v ) +#define vec_u16_to_s32_l(v) (vec_s32_t) vec_mergel( (vec_u16_t) v, zero_u16v ) +#endif #define vec_u16_to_u32(v) vec_u16_to_u32_h(v) #define vec_u16_to_s32(v) vec_u16_to_s32_h(v)
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/predict.c -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/predict.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2014 x264 project + * Copyright (C) 2007-2015 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/predict.h -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2014 x264 project + * Copyright (C) 2007-2015 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/quant.c -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/quant.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2014 x264 project + * Copyright (C) 2007-2015 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> * @@ -251,6 +251,14 @@ vec_st(dctv, 8*y, dct); \ } +#ifdef WORDS_BIGENDIAN +#define VEC_MULE vec_mule +#define VEC_MULO vec_mulo +#else +#define VEC_MULE vec_mulo +#define VEC_MULO vec_mule +#endif + #define DEQUANT_SHR() \ { \ dctv = vec_ld(8*y, dct); \ @@ -259,14 +267,14 @@ mf1v = vec_ld(16*y, dequant_mf[i_mf]); \ mf2v = vec_ld(16+16*y, dequant_mf[i_mf]); \ \ - multEvenvA = vec_mule(dct1v, (vec_s16_t)mf1v); \ - multOddvA = vec_mulo(dct1v, (vec_s16_t)mf1v); \ + multEvenvA = VEC_MULE(dct1v, (vec_s16_t)mf1v); \ + multOddvA = VEC_MULO(dct1v, (vec_s16_t)mf1v); \ temp1v = vec_add(vec_sl(multEvenvA, sixteenv), multOddvA); \ temp1v = vec_add(temp1v, fv); \ temp1v = vec_sra(temp1v, i_qbitsv); \ \ - multEvenvA = vec_mule(dct2v, (vec_s16_t)mf2v); \ - multOddvA = vec_mulo(dct2v, (vec_s16_t)mf2v); \ + multEvenvA = VEC_MULE(dct2v, (vec_s16_t)mf2v); \ + multOddvA = VEC_MULO(dct2v, (vec_s16_t)mf2v); \ temp2v = vec_add(vec_sl(multEvenvA, sixteenv), multOddvA); \ temp2v = vec_add(temp2v, fv); \ temp2v = vec_sra(temp2v, i_qbitsv); \
View file
x264-snapshot-20141218-2245.tar.bz2/common/ppc/quant.h -> x264-snapshot-20150804-2245.tar.bz2/common/ppc/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2014 x264 project + * Copyright (C) 2007-2015 x264 project * * Authors: Guillaume Poirier <gpoirier@mplayerhq.hu> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/predict.c -> x264-snapshot-20150804-2245.tar.bz2/common/predict.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -43,6 +43,9 @@ #if ARCH_AARCH64 # include "aarch64/predict.h" #endif +#if ARCH_MIPS +# include "mips/predict.h" +#endif /**************************************************************************** * 16x16 prediction for intra luma block @@ -906,6 +909,21 @@ #if ARCH_AARCH64 x264_predict_16x16_init_aarch64( cpu, pf ); #endif + +#if !HIGH_BIT_DEPTH +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf[I_PRED_16x16_V ] = x264_intra_predict_vert_16x16_msa; + pf[I_PRED_16x16_H ] = x264_intra_predict_hor_16x16_msa; + pf[I_PRED_16x16_DC] = x264_intra_predict_dc_16x16_msa; + pf[I_PRED_16x16_P ] = x264_intra_predict_plane_16x16_msa; + pf[I_PRED_16x16_DC_LEFT]= x264_intra_predict_dc_left_16x16_msa; + pf[I_PRED_16x16_DC_TOP ]= x264_intra_predict_dc_top_16x16_msa; + pf[I_PRED_16x16_DC_128 ]= x264_intra_predict_dc_128_16x16_msa; + } +#endif +#endif } void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] ) @@ -934,6 +952,15 @@ #if ARCH_AARCH64 x264_predict_8x8c_init_aarch64( cpu, pf ); #endif + +#if !HIGH_BIT_DEPTH +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf[I_PRED_CHROMA_P ] = x264_intra_predict_plane_8x8_msa; + } +#endif +#endif } void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] ) @@ -949,6 +976,10 @@ #if HAVE_MMX x264_predict_8x16c_init_mmx( cpu, pf ); #endif + +#if ARCH_AARCH64 + x264_predict_8x16c_init_aarch64( cpu, pf ); +#endif } void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) @@ -978,6 +1009,15 @@ #if ARCH_AARCH64 x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); #endif + +#if !HIGH_BIT_DEPTH +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf[I_PRED_8x8_DDL] = x264_intra_predict_ddl_8x8_msa; + } +#endif +#endif } void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
View file
x264-snapshot-20141218-2245.tar.bz2/common/predict.h -> x264-snapshot-20150804-2245.tar.bz2/common/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: intra prediction ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/quant.c -> x264-snapshot-20150804-2245.tar.bz2/common/quant.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com> @@ -40,6 +40,9 @@ #if ARCH_AARCH64 # include "aarch64/quant.h" #endif +#if ARCH_MIPS +# include "mips/quant.h" +#endif #define QUANT_ONE( coef, mf, f ) \ { \ @@ -714,7 +717,8 @@ #endif // HAVE_MMX #if HAVE_ALTIVEC - if( cpu&X264_CPU_ALTIVEC ) { + if( cpu&X264_CPU_ALTIVEC ) + { pf->quant_2x2_dc = x264_quant_2x2_dc_altivec; pf->quant_4x4_dc = x264_quant_4x4_dc_altivec; pf->quant_4x4 = x264_quant_4x4_altivec; @@ -753,6 +757,32 @@ { pf->coeff_last4 = x264_coeff_last4_aarch64; pf->coeff_last8 = x264_coeff_last8_aarch64; + pf->coeff_level_run4 = x264_coeff_level_run4_aarch64; + } + if( cpu&X264_CPU_NEON ) + { + pf->coeff_level_run8 = x264_coeff_level_run8_neon; + pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_neon; + pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_neon; + pf->decimate_score15 = x264_decimate_score15_neon; + pf->decimate_score16 = x264_decimate_score16_neon; + pf->decimate_score64 = x264_decimate_score64_neon; + pf->denoise_dct = x264_denoise_dct_neon; + } +#endif + +#if HAVE_MSA + if( cpu&X264_CPU_MSA ) + { + pf->quant_4x4 = x264_quant_4x4_msa; + pf->quant_4x4_dc = x264_quant_4x4_dc_msa; + pf->quant_4x4x4 = x264_quant_4x4x4_msa; + pf->quant_8x8 = x264_quant_8x8_msa; + pf->dequant_4x4 = x264_dequant_4x4_msa; + pf->dequant_4x4_dc = x264_dequant_4x4_dc_msa; + pf->dequant_8x8 = x264_dequant_8x8_msa; + pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_msa; + pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_msa; } #endif #endif // HIGH_BIT_DEPTH
View file
x264-snapshot-20141218-2245.tar.bz2/common/quant.h -> x264-snapshot-20150804-2245.tar.bz2/common/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/rectangle.c -> x264-snapshot-20150804-2245.tar.bz2/common/rectangle.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.c: rectangle filling ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Fiona Glaser <fiona@x264.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/rectangle.h -> x264-snapshot-20150804-2245.tar.bz2/common/rectangle.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.h: rectangle filling ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Fiona Glaser <fiona@x264.com> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/set.c -> x264-snapshot-20150804-2245.tar.bz2/common/set.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set.c: quantization init ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/set.h -> x264-snapshot-20150804-2245.tar.bz2/common/set.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set.h: quantization init ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/threadpool.c -> x264-snapshot-20150804-2245.tar.bz2/common/threadpool.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.c: thread pooling ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/threadpool.h -> x264-snapshot-20150804-2245.tar.bz2/common/threadpool.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.h: thread pooling ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/vlc.c -> x264-snapshot-20150804-2245.tar.bz2/common/vlc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * vlc.c : vlc tables ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/win32thread.c -> x264-snapshot-20150804-2245.tar.bz2/common/win32thread.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.c: windows threading ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * Pegasys Inc. <http://www.pegasys-inc.com> @@ -138,7 +138,7 @@ if( !win32_cond ) return -1; cond->ptr = win32_cond; - win32_cond->semaphore = CreateSemaphore( NULL, 0, 0x7fffffff, NULL ); + win32_cond->semaphore = CreateSemaphoreW( NULL, 0, 0x7fffffff, NULL ); if( !win32_cond->semaphore ) return -1; @@ -147,7 +147,7 @@ if( x264_pthread_mutex_init( &win32_cond->mtx_broadcast, NULL ) ) return -1; - win32_cond->waiters_done = CreateEvent( NULL, FALSE, FALSE, NULL ); + win32_cond->waiters_done = CreateEventW( NULL, FALSE, FALSE, NULL ); if( !win32_cond->waiters_done ) return -1;
View file
x264-snapshot-20141218-2245.tar.bz2/common/win32thread.h -> x264-snapshot-20150804-2245.tar.bz2/common/win32thread.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.h: windows threading ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/bitstream-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/bitstream-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* bitstream-a.asm: x86 bitstream functions ;***************************************************************************** -;* Copyright (C) 2010-2014 x264 project +;* Copyright (C) 2010-2015 x264 project ;* ;* Authors: Fiona Glaser <fiona@x264.com> ;* Henrik Gramner <henrik@gramner.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/cabac-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/cabac-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* cabac-a.asm: x86 cabac ;***************************************************************************** -;* Copyright (C) 2008-2014 x264 project +;* Copyright (C) 2008-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/const-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/const-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* const-a.asm: x86 global constants ;***************************************************************************** -;* Copyright (C) 2010-2014 x264 project +;* Copyright (C) 2010-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/cpu-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/cpu-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* cpu-a.asm: x86 cpu utilities ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Laurent Aimar <fenrir@via.ecp.fr> ;* Loren Merritt <lorenm@u.washington.edu> @@ -145,53 +145,3 @@ cglobal cpu_sfence sfence ret - -cextern intel_cpu_indicator_init - -;----------------------------------------------------------------------------- -; void safe_intel_cpu_indicator_init( void ); -;----------------------------------------------------------------------------- -cglobal safe_intel_cpu_indicator_init - push r0 - push r1 - push r2 - push r3 - push r4 - push r5 - push r6 -%if ARCH_X86_64 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 -%endif - push rbp - mov rbp, rsp -%if WIN64 - sub rsp, 32 ; shadow space -%endif - and rsp, ~31 - call intel_cpu_indicator_init - leave -%if ARCH_X86_64 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop r8 - pop r7 -%endif - pop r6 - pop r5 - pop r4 - pop r3 - pop r2 - pop r1 - pop r0 - ret
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/dct-32.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/dct-32.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-32.asm: x86_32 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/dct-64.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/dct-64.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-64.asm: x86_64 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/dct-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/dct-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-a.asm: x86 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Holger Lubitz <holger@lubitz.org> ;* Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/dct.h -> x264-snapshot-20150804-2245.tar.bz2/common/x86/dct.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: x86 transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/deblock-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/deblock-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* deblock-a.asm: x86 deblocking ;***************************************************************************** -;* Copyright (C) 2005-2014 x264 project +;* Copyright (C) 2005-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/mc-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/mc-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/mc-a2.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/mc-a2.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a2.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2005-2014 x264 project +;* Copyright (C) 2005-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com> @@ -40,6 +40,7 @@ deinterleave_shuf: times 2 db 0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15 %if HIGH_BIT_DEPTH +copy_swap_shuf: times 2 db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 v210_mask: times 4 dq 0xc00ffc003ff003ff v210_luma_shuf: times 2 db 1,2,4,5,6,7,9,10,12,13,14,15,12,13,14,15 v210_chroma_shuf: times 2 db 0,1,2,3,5,6,8,9,10,11,13,14,10,11,13,14 @@ -50,6 +51,7 @@ deinterleave_shuf32a: SHUFFLE_MASK_W 0,2,4,6,8,10,12,14 deinterleave_shuf32b: SHUFFLE_MASK_W 1,3,5,7,9,11,13,15 %else +copy_swap_shuf: times 2 db 1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14 deinterleave_rgb_shuf: db 0,3,6,9,1,4,7,10,2,5,8,11,-1,-1,-1,-1 db 0,4,8,12,1,5,9,13,2,6,10,14,-1,-1,-1,-1 @@ -913,64 +915,90 @@ %undef sfence %endif ; !HIGH_BIT_DEPTH +%macro PREFETCHNT_ITER 2 ; src, bytes/iteration + %assign %%i 4*(%2) ; prefetch 4 iterations ahead. is this optimal? + %rep (%2+63) / 64 ; assume 64 byte cache lines + prefetchnta [%1+%%i] + %assign %%i %%i + 64 + %endrep +%endmacro + ;----------------------------------------------------------------------------- -; void plane_copy_core( pixel *dst, intptr_t i_dst, -; pixel *src, intptr_t i_src, int w, int h ) +; void plane_copy(_swap)_core( pixel *dst, intptr_t i_dst, +; pixel *src, intptr_t i_src, int w, int h ) ;----------------------------------------------------------------------------- -; assumes i_dst and w are multiples of 16, and i_dst>w -INIT_MMX -cglobal plane_copy_core_mmx2, 6,7 - FIX_STRIDES r1, r3, r4d -%if HIGH_BIT_DEPTH == 0 +; assumes i_dst and w are multiples of mmsize, and i_dst>w +%macro PLANE_COPY_CORE 1 ; swap +%if %1 +cglobal plane_copy_swap_core, 6,7 + mova m4, [copy_swap_shuf] +%else +cglobal plane_copy_core, 6,7 +%endif + FIX_STRIDES r1, r3 +%if %1 && HIGH_BIT_DEPTH + shl r4d, 2 +%elif %1 || HIGH_BIT_DEPTH + add r4d, r4d +%else movsxdifnidn r4, r4d %endif - sub r1, r4 - sub r3, r4 + add r0, r4 + add r2, r4 + neg r4 .loopy: - lea r6d, [r4-63] + lea r6, [r4+4*mmsize] +%if %1 + test r6d, r6d + jg .skip +%endif .loopx: - prefetchnta [r2+256] - movq m0, [r2 ] - movq m1, [r2+ 8] - movntq [r0 ], m0 - movntq [r0+ 8], m1 - movq m2, [r2+16] - movq m3, [r2+24] - movntq [r0+16], m2 - movntq [r0+24], m3 - movq m4, [r2+32] - movq m5, [r2+40] - movntq [r0+32], m4 - movntq [r0+40], m5 - movq m6, [r2+48] - movq m7, [r2+56] - movntq [r0+48], m6 - movntq [r0+56], m7 - add r2, 64 - add r0, 64 - sub r6d, 64 - jg .loopx - prefetchnta [r2+256] - add r6d, 63 - jle .end16 -.loop16: - movq m0, [r2 ] - movq m1, [r2+8] - movntq [r0 ], m0 - movntq [r0+8], m1 - add r2, 16 - add r0, 16 - sub r6d, 16 - jg .loop16 -.end16: + PREFETCHNT_ITER r2+r6, 4*mmsize + movu m0, [r2+r6-4*mmsize] + movu m1, [r2+r6-3*mmsize] + movu m2, [r2+r6-2*mmsize] + movu m3, [r2+r6-1*mmsize] +%if %1 + pshufb m0, m4 + pshufb m1, m4 + pshufb m2, m4 + pshufb m3, m4 +%endif + movnta [r0+r6-4*mmsize], m0 + movnta [r0+r6-3*mmsize], m1 + movnta [r0+r6-2*mmsize], m2 + movnta [r0+r6-1*mmsize], m3 + add r6, 4*mmsize + jle .loopx +.skip: + PREFETCHNT_ITER r2+r6, 4*mmsize + sub r6, 4*mmsize + jz .end +.loop_end: + movu m0, [r2+r6] +%if %1 + pshufb m0, m4 +%endif + movnta [r0+r6], m0 + add r6, mmsize + jl .loop_end +.end: add r0, r1 add r2, r3 - dec r5d + dec r5d jg .loopy sfence - emms RET +%endmacro +INIT_XMM sse +PLANE_COPY_CORE 0 +INIT_XMM ssse3 +PLANE_COPY_CORE 1 +INIT_YMM avx +PLANE_COPY_CORE 0 +INIT_YMM avx2 +PLANE_COPY_CORE 1 %macro INTERLEAVE 4-5 ; dst, srcu, srcv, is_aligned, nt_hint %if HIGH_BIT_DEPTH @@ -2136,7 +2164,7 @@ INIT_YMM avx MBTREE_AVX 8 -INIT_YMM avx2,fma3 +INIT_YMM avx2 MBTREE_AVX 7 %macro MBTREE_PROPAGATE_LIST 0
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/mc-c.c -> x264-snapshot-20150804-2245.tar.bz2/common/x86/mc-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -90,8 +90,12 @@ void x264_prefetch_fenc_420_mmx2( pixel *, intptr_t, pixel *, intptr_t, int ); void x264_prefetch_fenc_422_mmx2( pixel *, intptr_t, pixel *, intptr_t, int ); void x264_prefetch_ref_mmx2( pixel *, intptr_t, int ); -void x264_plane_copy_core_mmx2( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +void x264_plane_copy_core_sse( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +void x264_plane_copy_core_avx( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); void x264_plane_copy_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +void x264_plane_copy_swap_core_ssse3( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +void x264_plane_copy_swap_core_avx2 ( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +void x264_plane_copy_swap_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); void x264_plane_copy_interleave_core_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); @@ -167,8 +171,8 @@ uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); void x264_mbtree_propagate_cost_fma4( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); -void x264_mbtree_propagate_cost_avx2_fma3( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, - uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); +void x264_mbtree_propagate_cost_avx2( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, + uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); #define MC_CHROMA(cpu)\ void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src,\ @@ -363,9 +367,6 @@ } #endif // !HIGH_BIT_DEPTH -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - #define MC_LUMA(name,instr1,instr2)\ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\ pixel *src[4], intptr_t i_src_stride,\ @@ -374,10 +375,10 @@ {\ int qpel_idx = ((mvy&3)<<2) + (mvx&3);\ int offset = (mvy>>2)*i_src_stride + (mvx>>2);\ - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ if( qpel_idx & 5 ) /* qpel interpolation needed */\ {\ - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ x264_pixel_avg_wtab_##instr1[i_width>>2](\ dst, i_dst_stride, src1, i_src_stride,\ src2, i_height );\ @@ -412,10 +413,10 @@ {\ int qpel_idx = ((mvy&3)<<2) + (mvx&3);\ int offset = (mvy>>2)*i_src_stride + (mvx>>2);\ - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ if( qpel_idx & 5 ) /* qpel interpolation needed */\ {\ - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ x264_pixel_avg_wtab_##name[i_width>>2](\ dst, *i_dst_stride, src1, i_src_stride,\ src2, i_height );\ @@ -492,39 +493,94 @@ #endif #endif // HIGH_BIT_DEPTH -static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ) -{ - int c_w = 16/sizeof(pixel) - 1; - if( w < 256 ) { // tiny resolutions don't want non-temporal hints. dunno the exact threshold. - x264_plane_copy_c( dst, i_dst, src, i_src, w, h ); - } else if( !(w&c_w) ) { - x264_plane_copy_core_mmx2( dst, i_dst, src, i_src, w, h ); - } else if( i_src > 0 ) { - // have to use plain memcpy on the last line (in memory order) to avoid overreading src - x264_plane_copy_core_mmx2( dst, i_dst, src, i_src, (w+c_w)&~c_w, h-1 ); - memcpy( dst+i_dst*(h-1), src+i_src*(h-1), w*sizeof(pixel) ); - } else { - memcpy( dst, src, w*sizeof(pixel) ); - x264_plane_copy_core_mmx2( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h-1 ); - } +#define PLANE_COPY(align, cpu)\ +static void x264_plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ +{\ + int c_w = (align) / sizeof(pixel) - 1;\ + if( w < 256 ) /* tiny resolutions don't want non-temporal hints. dunno the exact threshold. */\ + x264_plane_copy_c( dst, i_dst, src, i_src, w, h );\ + else if( !(w&c_w) )\ + x264_plane_copy_core_##cpu( dst, i_dst, src, i_src, w, h );\ + else\ + {\ + if( --h > 0 )\ + {\ + if( i_src > 0 )\ + {\ + x264_plane_copy_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\ + dst += i_dst * h;\ + src += i_src * h;\ + }\ + else\ + x264_plane_copy_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\ + }\ + /* use plain memcpy on the last line (in memory order) to avoid overreading src. */\ + memcpy( dst, src, w*sizeof(pixel) );\ + }\ +} + +PLANE_COPY(16, sse) +PLANE_COPY(32, avx) + +#define PLANE_COPY_SWAP(align, cpu)\ +static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ +{\ + int c_w = (align>>1) / sizeof(pixel) - 1;\ + if( !(w&c_w) )\ + x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, w, h );\ + else if( w > c_w )\ + {\ + if( --h > 0 )\ + {\ + if( i_src > 0 )\ + {\ + x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\ + dst += i_dst * h;\ + src += i_src * h;\ + }\ + else\ + x264_plane_copy_swap_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\ + }\ + x264_plane_copy_swap_core_##cpu( dst, 0, src, 0, w&~c_w, 1 );\ + for( int x = 2*(w&~c_w); x < 2*w; x += 2 )\ + {\ + dst[x] = src[x+1];\ + dst[x+1] = src[x];\ + }\ + }\ + else\ + x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\ } +PLANE_COPY_SWAP(16, ssse3) +PLANE_COPY_SWAP(32, avx2) + #define PLANE_INTERLEAVE(cpu) \ static void x264_plane_copy_interleave_##cpu( pixel *dst, intptr_t i_dst,\ pixel *srcu, intptr_t i_srcu,\ pixel *srcv, intptr_t i_srcv, int w, int h )\ {\ - if( !(w&15) ) {\ + int c_w = 16 / sizeof(pixel) - 1;\ + if( !(w&c_w) )\ x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\ - } else if( w < 16 || (i_srcu ^ i_srcv) ) {\ - x264_plane_copy_interleave_c( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\ - } else if( i_srcu > 0 ) {\ - x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, (w+15)&~15, h-1 );\ - x264_plane_copy_interleave_c( dst+i_dst*(h-1), 0, srcu+i_srcu*(h-1), 0, srcv+i_srcv*(h-1), 0, w, 1 );\ - } else {\ + else if( w > c_w && (i_srcu ^ i_srcv) >= 0 ) /* only works correctly for strides with identical signs */\ + {\ + if( --h > 0 )\ + {\ + if( i_srcu > 0 )\ + {\ + x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, (w+c_w)&~c_w, h );\ + dst += i_dst * h;\ + srcu += i_srcu * h;\ + srcv += i_srcv * h;\ + }\ + else\ + x264_plane_copy_interleave_core_##cpu( dst+i_dst, i_dst, srcu+i_srcu, i_srcu, srcv+i_srcv, i_srcv, (w+c_w)&~c_w, h );\ + }\ x264_plane_copy_interleave_c( dst, 0, srcu, 0, srcv, 0, w, 1 );\ - x264_plane_copy_interleave_core_##cpu( dst+i_dst, i_dst, srcu+i_srcu, i_srcu, srcv+i_srcv, i_srcv, (w+15)&~15, h-1 );\ }\ + else\ + x264_plane_copy_interleave_c( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\ } PLANE_INTERLEAVE(mmx2) @@ -666,7 +722,6 @@ pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2; pf->prefetch_ref = x264_prefetch_ref_mmx2; - pf->plane_copy = x264_plane_copy_mmx2; pf->plane_copy_interleave = x264_plane_copy_interleave_mmx2; pf->store_interleave_chroma = x264_store_interleave_chroma_mmx2; @@ -695,6 +750,7 @@ { pf->memcpy_aligned = x264_memcpy_aligned_sse; pf->memzero_aligned = x264_memzero_aligned_sse;
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/mc.h -> x264-snapshot-20150804-2245.tar.bz2/common/x86/mc.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/pixel-32.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/pixel-32.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel-32.asm: x86_32 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/pixel-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/pixel-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel.asm: x86 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/pixel.h -> x264-snapshot-20150804-2245.tar.bz2/common/x86/pixel.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: x86 pixel metrics ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/predict-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/predict-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* predict-a.asm: x86 intra prediction ;***************************************************************************** -;* Copyright (C) 2005-2014 x264 project +;* Copyright (C) 2005-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <holger@lubitz.org>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/predict-c.c -> x264-snapshot-20150804-2245.tar.bz2/common/x86/predict-c.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict-c.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/predict.h -> x264-snapshot-20150804-2245.tar.bz2/common/x86/predict.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: x86 intra prediction ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/quant-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/quant-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* quant-a.asm: x86 quantization and level-run ;***************************************************************************** -;* Copyright (C) 2005-2014 x264 project +;* Copyright (C) 2005-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/quant.h -> x264-snapshot-20150804-2245.tar.bz2/common/x86/quant.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: x86 quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/sad-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/sad-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad-a.asm: x86 sad functions ;***************************************************************************** -;* Copyright (C) 2003-2014 x264 project +;* Copyright (C) 2003-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Fiona Glaser <fiona@x264.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/sad16-a.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/sad16-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad16-a.asm: x86 high depth sad functions ;***************************************************************************** -;* Copyright (C) 2010-2014 x264 project +;* Copyright (C) 2010-2015 x264 project ;* ;* Authors: Oskar Arvidsson <oskar@irock.se> ;* Henrik Gramner <henrik@gramner.com>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/trellis-64.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/trellis-64.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* trellis-64.asm: x86_64 trellis quantization ;***************************************************************************** -;* Copyright (C) 2012-2014 x264 project +;* Copyright (C) 2012-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;*
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/util.h -> x264-snapshot-20150804-2245.tar.bz2/common/x86/util.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * util.h: x86 inline asm ***************************************************************************** - * Copyright (C) 2008-2014 x264 project + * Copyright (C) 2008-2015 x264 project * * Authors: Fiona Glaser <fiona@x264.com> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/x86inc.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/x86inc.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2014 x264 project +;* Copyright (C) 2005-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Anton Mitrofanov <BugMaster@narod.ru> @@ -64,6 +64,15 @@ %endif %endif +%define FORMAT_ELF 0 +%ifidn __OUTPUT_FORMAT__,elf + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf32 + %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,elf64 + %define FORMAT_ELF 1 +%endif + %ifdef PREFIX %define mangle(x) _ %+ x %else @@ -74,10 +83,6 @@ SECTION .rodata align=%1 %endmacro -%macro SECTION_TEXT 0-1 16 - SECTION .text align=%1 -%endmacro - %if WIN64 %define PIC %elif ARCH_X86_64 == 0 @@ -90,6 +95,10 @@ default rel %endif +%ifdef __NASM_VER__ + %use smartalign +%endif + ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments ; into registers at the start, and make no other use of the stack. Luckily that @@ -675,7 +684,7 @@ CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 - %ifidn __OUTPUT_FORMAT__,elf + %if FORMAT_ELF global %2:function %%VISIBILITY %else global %2 @@ -701,14 +710,16 @@ ; like cextern, but without the prefix %macro cextern_naked 1 - %xdefine %1 mangle(%1) + %ifdef PREFIX + %xdefine %1 mangle(%1) + %endif CAT_XDEFINE cglobaled_, %1, 1 extern %1 %endmacro %macro const 1-2+ %xdefine %1 mangle(private_prefix %+ _ %+ %1) - %ifidn __OUTPUT_FORMAT__,elf + %if FORMAT_ELF global %1:data hidden %else global %1 @@ -716,10 +727,9 @@ %1: %2 %endmacro -; This is needed for ELF, otherwise the GNU linker assumes the stack is -; executable by default. -%ifidn __OUTPUT_FORMAT__,elf -SECTION .note.GNU-stack noalloc noexec nowrite progbits +; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. +%if FORMAT_ELF + [SECTION .note.GNU-stack noalloc noexec nowrite progbits] %endif ; cpuflags @@ -738,8 +748,8 @@ %assign cpuflags_avx (1<<11)| cpuflags_sse42 %assign cpuflags_xop (1<<12)| cpuflags_avx %assign cpuflags_fma4 (1<<13)| cpuflags_avx -%assign cpuflags_avx2 (1<<14)| cpuflags_avx -%assign cpuflags_fma3 (1<<15)| cpuflags_avx +%assign cpuflags_fma3 (1<<14)| cpuflags_avx +%assign cpuflags_avx2 (1<<15)| cpuflags_fma3 %assign cpuflags_cache32 (1<<16) %assign cpuflags_cache64 (1<<17) @@ -789,9 +799,17 @@ %endif %if ARCH_X86_64 || cpuflag(sse2) - CPU amdnop + %ifdef __NASM_VER__ + ALIGNMODE k8 + %else + CPU amdnop + %endif %else - CPU basicnop + %ifdef __NASM_VER__ + ALIGNMODE nop + %else + CPU basicnop + %endif %endif %endmacro @@ -868,7 +886,7 @@ %assign %%i 0 %rep num_mmregs CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nymm, %%i, %%i + CAT_XDEFINE nnymm, %%i, %%i %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 @@ -1070,6 +1088,8 @@ %ifdef cpuname %if notcpuflag(%2) %error use of ``%1'' %2 instruction in cpuname function: current_function + %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 + %error use of ``%1'' sse2 instruction in cpuname function: current_function %endif %endif %endif @@ -1206,7 +1226,7 @@ AVX_INSTR minss, sse, 1, 0, 1 AVX_INSTR movapd, sse2 AVX_INSTR movaps, sse -AVX_INSTR movd +AVX_INSTR movd, mmx AVX_INSTR movddup, sse3 AVX_INSTR movdqa, sse2 AVX_INSTR movdqu, sse2 @@ -1222,7 +1242,7 @@ AVX_INSTR movntdqa, sse4 AVX_INSTR movntpd, sse2 AVX_INSTR movntps, sse -AVX_INSTR movq +AVX_INSTR movq, mmx AVX_INSTR movsd, sse2, 1, 0, 0 AVX_INSTR movshdup, sse3 AVX_INSTR movsldup, sse3 @@ -1468,13 +1488,15 @@ FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug -%if ARCH_X86_64 == 0 -%macro vpbroadcastq 2 -%if sizeof%1 == 16 - movddup %1, %2 -%else - vbroadcastsd %1, %2 -%endif -%endmacro +; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) +%ifdef __YASM_VER__ + %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 + %macro vpbroadcastq 2 + %if sizeof%1 == 16 + movddup %1, %2 + %else + vbroadcastsd %1, %2 + %endif + %endmacro + %endif %endif
View file
x264-snapshot-20141218-2245.tar.bz2/common/x86/x86util.asm -> x264-snapshot-20150804-2245.tar.bz2/common/x86/x86util.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86util.asm: x86 utility macros ;***************************************************************************** -;* Copyright (C) 2008-2014 x264 project +;* Copyright (C) 2008-2015 x264 project ;* ;* Authors: Holger Lubitz <holger@lubitz.org> ;* Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/config.guess -> x264-snapshot-20150804-2245.tar.bz2/config.guess
Changed
@@ -979,6 +979,9 @@ ppc64:Linux:*:*) echo powerpc64-unknown-linux-gnu exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-gnu + exit ;; ppc:Linux:*:*) echo powerpc-unknown-linux-gnu exit ;;
View file
x264-snapshot-20141218-2245.tar.bz2/configure -> x264-snapshot-20150804-2245.tar.bz2/configure
Changed
@@ -77,7 +77,7 @@ # several non gcc compilers issue an incredibly large number of warnings on any warning level, # suppress them by disabling all warnings rather than having to use #pragmas to disable most of them for arg in $*; do - [ $arg = -ffast-math ] && arg= + [ "$arg" = -ffast-math ] && arg= [[ "$arg" = -falign-loops* ]] && arg= [ "$arg" = -fno-tree-vectorize ] && arg= [ "$arg" = -Wshadow ] && arg= @@ -105,10 +105,10 @@ cl_ldflags() { for arg in $*; do arg=${arg/LIBPATH/libpath} - [ ${arg#-libpath:} == $arg -a ${arg#-l} != $arg ] && arg=${arg#-l}.lib - [ ${arg#-L} != $arg ] && arg=-libpath:${arg#-L} - [ $arg = -Wl,--large-address-aware ] && arg=-largeaddressaware - [ $arg = -s ] && arg= + [ "${arg#-libpath:}" == "$arg" -a "${arg#-l}" != "$arg" ] && arg=${arg#-l}.lib + [ "${arg#-L}" != "$arg" ] && arg=-libpath:${arg#-L} + [ "$arg" = -Wl,--large-address-aware ] && arg=-largeaddressaware + [ "$arg" = -s ] && arg= [ "$arg" = -Wl,-Bsymbolic ] && arg= [ "$arg" = -fno-tree-vectorize ] && arg= [ "$arg" = -Werror ] && arg= @@ -119,6 +119,7 @@ arg=${arg/pthreadGC/pthreadVC} [ "$arg" = avifil32.lib ] && arg=vfw32.lib [ "$arg" = gpac_static.lib ] && arg=libgpac_static.lib + [ "$arg" = x264.lib ] && arg=libx264.lib [ -n "$arg" ] && echo -n "$arg " done @@ -143,7 +144,9 @@ log_check "for $3 in $1"; fi rm -f conftest.c - [ -n "$1" ] && echo "#include <$1>" > conftest.c + for arg in $1; do + echo "#include <$arg>" >> conftest.c + done echo "int main (void) { $3 return 0; }" >> conftest.c if [ $compiler_style = MS ]; then cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)" @@ -172,7 +175,9 @@ cpp_check() { log_check "whether $3 is true" rm -f conftest.c - [ -n "$1" ] && echo "#include <$1>" > conftest.c + for arg in $1; do + echo "#include <$arg>" >> conftest.c + done echo -e "#if !($3) \n#error $4 \n#endif " >> conftest.c if [ $compiler_style = MS ]; then cpp_cmd="$CC conftest.c $(cc_cflags $CFLAGS $2) -P" @@ -256,6 +261,48 @@ exit 1 } +configure_system_override() { + log_check "system libx264 configuration" + x264_config_path="$1/x264_config.h" + if [ -e "$x264_config_path" ]; then + res=$? + log_ok + arg="$(grep '#define X264_GPL ' $x264_config_path | sed -e 's/#define X264_GPL *//; s/ *$//')" + if [ -n "$arg" ]; then + [ "$arg" = 0 ] && arg="no" || arg="yes" + [ "$arg" != "$gpl" ] && die "Incompatible license with system libx264" + fi + arg="$(grep '#define X264_BIT_DEPTH ' $x264_config_path | sed -e 's/#define X264_BIT_DEPTH *//; s/ *$//')" + if [ -n "$arg" ]; then + if [ "$arg" != "$bit_depth" ]; then + echo "Override output bit depth with system libx264 configuration" + bit_depth="$arg" + fi + fi + arg="$(grep '#define X264_CHROMA_FORMAT ' $x264_config_path | sed -e 's/#define X264_CHROMA_FORMAT *//; s/ *$//')" + if [ -n "$arg" ]; then + [ "$arg" = 0 ] && arg="all" || arg="${arg#X264_CSP_I}" + if [ "$arg" != "$chroma_format" ]; then + echo "Override output chroma format with system libx264 configuration" + chroma_format="$arg" + fi + fi + arg="$(grep '#define X264_INTERLACED ' $x264_config_path | sed -e 's/#define X264_INTERLACED *//; s/ *$//')" + if [ -n "$arg" ]; then + [ "$arg" = 0 ] && arg="no" || arg="yes" + if [ "$arg" != "$interlaced" ]; then + echo "Override interlaced encoding support with system libx264 configuration" + interlaced="$arg" + fi + fi + else + res=$? + log_fail + log_msg "Failed search path was: $x264_config_path" + fi + return $res +} + rm -f x264_config.h config.h config.mak config.log x264.pc x264.def conftest* SRCPATH="$(cd $(dirname $0); pwd)" @@ -311,7 +358,8 @@ # list of all preprocessor HAVE values we can define CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ - LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC" + LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \ + MSA" # parse options @@ -458,6 +506,8 @@ host_vendor="${host%%-*}" host_os="${host#*-}" +trap 'rm -f conftest*' EXIT + # test for use of compilers that require specific handling cc_base=`basename "$CC"` QPRE="-" @@ -600,9 +650,9 @@ case $host_cpu in i*86) ARCH="X86" - AS="yasm" + AS="${AS-yasm}" AS_EXT=".asm" - ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/" + ASFLAGS="$ASFLAGS -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/" if [ $compiler = GNU ]; then if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then CFLAGS="$CFLAGS -march=i686" @@ -629,39 +679,39 @@ stack_alignment=4 fi if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho -DPREFIX" + ASFLAGS="$ASFLAGS -f macho32 -DPREFIX" elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then ASFLAGS="$ASFLAGS -f win32 -DPREFIX" LDFLAGS="$LDFLAGS -Wl,--large-address-aware" [ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase" [ $compiler = GNU ] && RCFLAGS="--target=pe-i386 $RCFLAGS" else - ASFLAGS="$ASFLAGS -f elf" + ASFLAGS="$ASFLAGS -f elf32" fi ;; x86_64) ARCH="X86_64" - AS="yasm" + AS="${AS-yasm}" AS_EXT=".asm" ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/" [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS" if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho64 -m amd64 -DPIC -DPREFIX" + ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX" if cc_check '' "-arch x86_64"; then CFLAGS="$CFLAGS -arch x86_64" LDFLAGS="$LDFLAGS -arch x86_64" fi elif [ "$SYS" = WINDOWS -o "$SYS" = CYGWIN ]; then - ASFLAGS="$ASFLAGS -f win32 -m amd64" + ASFLAGS="$ASFLAGS -f win64" # only the GNU toolchain is inconsistent in prefixing function names with _ [ $compiler = GNU ] && cc_check "" "-S" && grep -q "_main:" conftest && ASFLAGS="$ASFLAGS -DPREFIX" [ $compiler = GNU ] && LDFLAGS="$LDFLAGS -Wl,--nxcompat -Wl,--dynamicbase" [ $compiler = GNU ] && RCFLAGS="--target=pe-x86-64 $RCFLAGS" else - ASFLAGS="$ASFLAGS -f elf -m amd64" + ASFLAGS="$ASFLAGS -f elf64" fi ;; - powerpc|powerpc64) + powerpc*) ARCH="PPC" if [ $asm = auto ] ; then define HAVE_ALTIVEC @@ -678,13 +728,15 @@ sparc) ARCH="SPARC" ;; - mips|mipsel|mips64|mips64el) + mips*) ARCH="MIPS" + AS="${AS-${CC}}" + AS_EXT=".c" ;; arm*) ARCH="ARM" if [ "$SYS" = MACOSX ] ; then - AS="${AS-extras/gas-preprocessor.pl $CC}" + AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch arm -- ${CC}}" ASFLAGS="$ASFLAGS -DPREFIX -DPIC" # apple's ld doesn't support movw/movt relocations at all # build for armv7 by default if ! echo $CFLAGS | grep -Eq '\-arch' ; then @@ -698,7 +750,7 @@
View file
x264-snapshot-20141218-2245.tar.bz2/doc/vui.txt -> x264-snapshot-20150804-2245.tar.bz2/doc/vui.txt
Changed
@@ -16,14 +16,14 @@ * How do I use it? You can derive the SAR of an image from the width, height and the display aspect ratio (DAR) of the image as follows: - + SAR_x DAR_x * height ----- = -------------- SAR_y DAR_y * width - + for example: width x height = 704x576, DAR = 4:3 ==> SAR = 2304:2112 or 12:11 - + Please note that if your material is a digitized analog signal, you should not use this equation to calculate the SAR. Refer to the manual of your digitizing equipment or this link instead. @@ -36,7 +36,7 @@ correction of aspect ratios, and there are just few exceptions. You should even use it, if the SAR of your material is 1:1, as the default of x264 is "SAR not defined". - + 2. Overscan ------------ @@ -49,7 +49,7 @@ analog signal. Instead it refers to the "overscan" process on a display that shows only a part of the image. What that part is depends on the display. - + * How do I use this option? As I'm not sure about what part of the image is shown when the display uses an overscan process, I can't provide you with rules or examples. The safe @@ -72,7 +72,7 @@ * What is it? A purely informative setting, that explains what the type of your analog video was, before you digitized it. - + * How do I use this option? Just set it to the desired value. ( e.g. NTSC, PAL ) If you transcode from MPEG2, you may find the value for this option in the @@ -101,11 +101,11 @@ or want to make sure that your material is played back without oversaturation, set if to on. Please note that the default for this option in x264 is off, which is not a safe assumption. - + * Should I use this option? Yes, but there are few decoders/ media players that distinguish between the two options. - + 5. Color Primaries, Transfer Characteristics, Matrix Coefficients ------------------------------------------------------------------- @@ -120,7 +120,7 @@ profile of the digitizing equipment is known, it is possible to correct the colors and gamma of the decoded h264 stream in a way that the video stream looks the same, regardless of the digitizing equipment used. - + * How do I use these options? If you are able to find out which characteristics your digitizing equipment uses, (see the equipment documentation or make reference measurements) @@ -170,9 +170,8 @@ chroma sample location in that direction is equal to one of the luma samples. H264 Annex E contains images that tell you how to "transform" your Chroma Sample Location into a value of 0 to 5 that you can pass to x264. - + * Should I use this option? Unless you are a perfectionist, don't bother. Media players ignore this setting, and favor their own (fixed) assumed Chroma Sample Location. -
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/analyse.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/analyse.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * analyse.c: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/analyse.h -> x264-snapshot-20150804-2245.tar.bz2/encoder/analyse.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * analyse.h: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/cabac.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/cabac.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: cabac bitstream writing ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/cavlc.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/cavlc.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cavlc.c: cavlc bitstream writing ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/encoder.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/encoder.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * encoder.c: top-level encoder functions ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -32,6 +32,9 @@ #include "ratecontrol.h" #include "macroblock.h" #include "me.h" +#if HAVE_INTEL_DISPATCHER +#include "extras/intel_dispatcher.h" +#endif //#define DEBUG_MB_TYPE @@ -471,12 +474,12 @@ int i_csp = h->param.i_csp & X264_CSP_MASK; #if X264_CHROMA_FORMAT - if( CHROMA_FORMAT != CHROMA_420 && i_csp >= X264_CSP_I420 && i_csp <= X264_CSP_NV12 ) + if( CHROMA_FORMAT != CHROMA_420 && i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 ) { x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" ); return -1; } - else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp <= X264_CSP_V210 ) + else if( CHROMA_FORMAT != CHROMA_422 && i_csp >= X264_CSP_I422 && i_csp < X264_CSP_I444 ) { x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:2 support\n" ); return -1; @@ -489,36 +492,41 @@ #endif if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX ) { - x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" ); + x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" ); return -1; } - if( i_csp < X264_CSP_I444 && h->param.i_width % 2 ) + int w_mod = i_csp < X264_CSP_I444 ? 2 : 1; + int h_mod = (i_csp < X264_CSP_I422 ? 2 : 1) << PARAM_INTERLACED; + if( h->param.i_width % w_mod ) { - x264_log( h, X264_LOG_ERROR, "width not divisible by 2 (%dx%d)\n", - h->param.i_width, h->param.i_height ); + x264_log( h, X264_LOG_ERROR, "width not divisible by %d (%dx%d)\n", + w_mod, h->param.i_width, h->param.i_height ); return -1; } - - if( i_csp < X264_CSP_I422 && PARAM_INTERLACED && h->param.i_height % 4 ) + if( h->param.i_height % h_mod ) { - x264_log( h, X264_LOG_ERROR, "height not divisible by 4 (%dx%d)\n", - h->param.i_width, h->param.i_height ); + x264_log( h, X264_LOG_ERROR, "height not divisible by %d (%dx%d)\n", + h_mod, h->param.i_width, h->param.i_height ); return -1; } - if( (i_csp < X264_CSP_I422 || PARAM_INTERLACED) && h->param.i_height % 2 ) + if( h->param.crop_rect.i_left >= h->param.i_width || + h->param.crop_rect.i_right >= h->param.i_width || + h->param.crop_rect.i_top >= h->param.i_height || + h->param.crop_rect.i_bottom >= h->param.i_height || + h->param.crop_rect.i_left + h->param.crop_rect.i_right >= h->param.i_width || + h->param.crop_rect.i_top + h->param.crop_rect.i_bottom >= h->param.i_height ) { - x264_log( h, X264_LOG_ERROR, "height not divisible by 2 (%dx%d)\n", - h->param.i_width, h->param.i_height ); + x264_log( h, X264_LOG_ERROR, "invalid crop-rect %u,%u,%u,%u\n", h->param.crop_rect.i_left, + h->param.crop_rect.i_top, h->param.crop_rect.i_right, h->param.crop_rect.i_bottom ); return -1; } - - if( (h->param.crop_rect.i_left + h->param.crop_rect.i_right ) >= h->param.i_width || - (h->param.crop_rect.i_top + h->param.crop_rect.i_bottom) >= h->param.i_height ) + if( h->param.crop_rect.i_left % w_mod || h->param.crop_rect.i_right % w_mod || + h->param.crop_rect.i_top % h_mod || h->param.crop_rect.i_bottom % h_mod ) { - x264_log( h, X264_LOG_ERROR, "invalid crop-rect %u,%u,%u,%u\n", h->param.crop_rect.i_left, - h->param.crop_rect.i_top, h->param.crop_rect.i_right, h->param.crop_rect.i_bottom ); + x264_log( h, X264_LOG_ERROR, "crop-rect %u,%u,%u,%u not divisible by %dx%d\n", h->param.crop_rect.i_left, + h->param.crop_rect.i_top, h->param.crop_rect.i_right, h->param.crop_rect.i_bottom, w_mod, h_mod ); return -1; } @@ -529,7 +537,13 @@ } if( h->param.i_threads == X264_THREADS_AUTO ) + { h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2; + /* Avoid too many threads as they don't improve performance and + * complicate VBV. Capped at an arbitrary 2 rows per thread. */ + int max_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 2 ); + h->param.i_threads = X264_MIN( h->param.i_threads, max_threads ); + } int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 ); if( h->param.i_threads > 1 ) { @@ -583,7 +597,20 @@ h->param.i_dpb_size = 1; } - h->param.i_frame_packing = x264_clip3( h->param.i_frame_packing, -1, 5 ); + if( h->param.i_frame_packing < -1 || h->param.i_frame_packing > 7 ) + { + x264_log( h, X264_LOG_WARNING, "ignoring unknown frame packing value\n" ); + h->param.i_frame_packing = -1; + } + if( h->param.i_frame_packing == 7 && + ((h->param.i_width - h->param.crop_rect.i_left - h->param.crop_rect.i_right) % 3 || + (h->param.i_height - h->param.crop_rect.i_top - h->param.crop_rect.i_bottom) % 3) ) + { + x264_log( h, X264_LOG_ERROR, "cropped resolution %dx%d not compatible with tile format frame packing\n", + h->param.i_width - h->param.crop_rect.i_left - h->param.crop_rect.i_right, + h->param.i_height - h->param.crop_rect.i_top - h->param.crop_rect.i_bottom ); + return -1; + } /* Detect default ffmpeg settings and terminate with an error. */ if( b_open ) @@ -1050,7 +1077,7 @@ h->param.analyse.intra &= ~X264_ANALYSE_I8x8; } h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 ); - h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 ); + h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 3 ); h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 ); if( h->param.rc.f_aq_strength == 0 ) h->param.rc.i_aq_mode = 0; @@ -1390,6 +1417,10 @@ if( param->param_free ) param->param_free( param ); +#if HAVE_INTEL_DISPATCHER + x264_intel_dispatcher_override(); +#endif + if( x264_threading_init() ) { x264_log( h, X264_LOG_ERROR, "unable to initialize threading\n" ); @@ -1676,6 +1707,7 @@ else if( !x264_is_regular_file( f ) ) { x264_log( h, X264_LOG_ERROR, "dump_yuv: incompatible with non-regular file %s\n", h->param.psz_dump_yuv ); + fclose( f ); goto fail; } fclose( f ); @@ -3213,6 +3245,12 @@ /* ------------------- Setup new frame from picture -------------------- */ if( pic_in != NULL ) { + if( h->lookahead->b_exit_thread ) + { + x264_log( h, X264_LOG_ERROR, "lookahead thread is already stopped\n" ); + return -1; + } + /* 1: Copy the picture to a frame and move it to a buffer */ x264_frame_t *fenc = x264_frame_pop_unused( h, 0 ); if( !fenc ) @@ -4087,14 +4125,14 @@ if( h->stat.i_frame_count[SLICE_TYPE_I] > 0 ) { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I]; - double i_count = h->stat.i_frame_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0; + double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0; x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); x264_log( h, X264_LOG_INFO, "mb I %s\n", buf ); } if( h->stat.i_frame_count[SLICE_TYPE_P] > 0 ) { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_P]; - double i_count = h->stat.i_frame_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0; + double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0; int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_P]; x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); x264_log( h, X264_LOG_INFO, @@ -4110,7 +4148,7 @@ if( h->stat.i_frame_count[SLICE_TYPE_B] > 0 ) { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_B]; - double i_count = h->stat.i_frame_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0; + double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0; double i_mb_list_count; int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_B]; int64_t list_count[3] = {0}; /* 0 == L0, 1 == L1, 2 == BI */
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/lookahead.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/lookahead.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * lookahead.c: high-level lookahead functions ***************************************************************************** - * Copyright (C) 2010-2014 Avail Media and x264 project + * Copyright (C) 2010-2015 Avail Media and x264 project * * Authors: Michael Kazmier <mkazmier@availmedia.com> * Alex Giladi <agiladi@availmedia.com>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/macroblock.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/macroblock.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.c: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/macroblock.h -> x264-snapshot-20150804-2245.tar.bz2/encoder/macroblock.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/me.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/me.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * me.c: motion estimation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/me.h -> x264-snapshot-20150804-2245.tar.bz2/encoder/me.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * me.h: motion estimation ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/ratecontrol.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/ratecontrol.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.c: ratecontrol ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Michael Niedermayer <michaelni@gmx.at> @@ -96,6 +96,7 @@ /* VBV stuff */ double buffer_size; int64_t buffer_fill_final; + int64_t buffer_fill_final_min; double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */ double buffer_rate; /* # of bits added to buffer_fill after each frame */ double vbv_max_rate; /* # of bits added to buffer_fill per second */ @@ -301,10 +302,6 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets ) { - /* constants chosen to result in approximately the same overall bitrate as without AQ. - * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */ - float strength; - float avg_adj = 0.f; /* Initialize frame stats */ for( int i = 0; i < 3; i++ ) { @@ -348,23 +345,30 @@ /* Actual adaptive quantization */ else { - if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) + /* constants chosen to result in approximately the same overall bitrate as without AQ. + * FIXME: while they're written in 5 significant digits, they're only tuned to 2. */ + float strength; + float avg_adj = 0.f; + float bias_strength = 0.f; + + if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE || h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED ) { - float bit_depth_correction = powf(1 << (BIT_DEPTH-8), 0.5f); + float bit_depth_correction = 1.f / (1 << (2*(BIT_DEPTH-8))); float avg_adj_pow2 = 0.f; for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) { uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame ); - float qp_adj = powf( energy + 1, 0.125f ); + float qp_adj = powf( energy * bit_depth_correction + 1, 0.125f ); frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; avg_adj += qp_adj; avg_adj_pow2 += qp_adj * qp_adj; } avg_adj /= h->mb.i_mb_count; avg_adj_pow2 /= h->mb.i_mb_count; - strength = h->param.rc.f_aq_strength * avg_adj / bit_depth_correction; - avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f * bit_depth_correction)) / avg_adj; + strength = h->param.rc.f_aq_strength * avg_adj; + avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj; + bias_strength = h->param.rc.f_aq_strength; } else strength = h->param.rc.f_aq_strength * 1.0397f; @@ -374,7 +378,12 @@ { float qp_adj; int mb_xy = mb_x + mb_y*h->mb.i_mb_stride; - if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) + if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE_BIASED ) + { + qp_adj = frame->f_qp_offset[mb_xy]; + qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 14.f / (qp_adj * qp_adj)); + } + else if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) { qp_adj = frame->f_qp_offset[mb_xy]; qp_adj = strength * (qp_adj - avg_adj); @@ -724,7 +733,8 @@ if( h->param.rc.f_vbv_buffer_init > 1. ) h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 ); h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1); - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale; + rc->buffer_fill_final = + rc->buffer_fill_final_min = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale; rc->b_vbv = 1; rc->b_vbv_min_rate = !rc->b_2pass && h->param.rc.i_rc_method == X264_RC_ABR @@ -776,11 +786,11 @@ if( h->param.i_nal_hrd ) { uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale; - uint64_t num = 180000; + uint64_t num = 90000; x264_reduce_fraction64( &num, &denom ); - rc->hrd_multiply_denom = 180000 / num; + rc->hrd_multiply_denom = 90000 / num; - double bits_required = log2( 180000 / rc->hrd_multiply_denom ) + double bits_required = log2( 90000 / rc->hrd_multiply_denom ) + log2( h->sps->vui.i_time_scale ) + log2( h->sps->vui.hrd.i_cpb_size_unscaled ); if( bits_required >= 63 ) @@ -822,6 +832,7 @@ int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1; CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds ); CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) ); + static const float pred_coeff_table[3] = { 1.0, 1.0, 1.5 }; for( int i = 0; i < 3; i++ ) { rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP ); @@ -829,8 +840,8 @@ rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max ); for( int j = 0; j < num_preds; j++ ) { - rc->pred[i+j*5].coeff_min = 2.0 / 4; - rc->pred[i+j*5].coeff = 2.0; + rc->pred[i+j*5].coeff_min = pred_coeff_table[i] / 2; + rc->pred[i+j*5].coeff = pred_coeff_table[i]; rc->pred[i+j*5].count = 1.0; rc->pred[i+j*5].decay = 0.5; rc->pred[i+j*5].offset = 0.0; @@ -844,7 +855,11 @@ rc->row_preds[i][j].offset = 0.0; } } - *rc->pred_b_from_p = rc->pred[0]; + rc->pred_b_from_p->coeff_min = 0.5 / 2; + rc->pred_b_from_p->coeff = 0.5; + rc->pred_b_from_p->count = 1.0; + rc->pred_b_from_p->decay = 0.5; + rc->pred_b_from_p->offset = 0.0; if( parse_zones( h ) < 0 ) { @@ -1914,15 +1929,16 @@ h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit + (double)(h->fenc->i_cpb_delay - h->i_cpb_delay_pir_offset) * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; - double cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - (double)rc->initial_cpb_removal_delay / 90000; if( h->fenc->b_keyframe ) { - rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; - rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; - rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; + rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; + rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; + rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; } - else - cpb_earliest_arrival_time -= (double)rc->initial_cpb_removal_delay_offset / 90000; + + double cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - (double)rc->initial_cpb_removal_delay / 90000; + if( !h->fenc->b_keyframe ) + cpb_earliest_arrival_time -= (double)rc->initial_cpb_removal_delay_offset / 90000; if( h->sps->vui.hrd.b_cbr_hrd ) h->fenc->hrd_timing.cpb_initial_arrival_time = rc->previous_cpb_final_arrival_time; @@ -2095,7 +2111,7 @@ int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled; x264_ratecontrol_t *rcc = h->rc; x264_ratecontrol_t *rct = h->thread[0]->rc; - uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; + int64_t buffer_size = (int64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; if( rcc->last_satd >= h->mb.i_mb_count ) update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits ); @@ -2103,32 +2119,45 @@ if( !rcc->b_vbv ) return filler; - rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale; + uint64_t buffer_diff = (uint64_t)bits * h->sps->vui.i_time_scale; + rct->buffer_fill_final -= buffer_diff; + rct->buffer_fill_final_min -= buffer_diff; - if( rct->buffer_fill_final < 0 ) + if( rct->buffer_fill_final_min < 0 ) { - double underflow = (double)rct->buffer_fill_final / h->sps->vui.i_time_scale; + double underflow = (double)rct->buffer_fill_final_min / h->sps->vui.i_time_scale; if( rcc->rate_factor_max_increment && rcc->qpm >= rcc->qp_novbv + rcc->rate_factor_max_increment ) x264_log( h, X264_LOG_DEBUG, "VBV underflow due to CRF-max (frame %d, %.0f bits)\n", h->i_frame, underflow ); else x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, underflow ); + rct->buffer_fill_final = + rct->buffer_fill_final_min = 0; } - rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 ); if( h->param.i_avcintra_class ) - rct->buffer_fill_final += buffer_size; + buffer_diff = buffer_size; else - rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration; - - if( h->param.rc.b_filler && rct->buffer_fill_final > buffer_size ) - { - int64_t scale = (int64_t)h->sps->vui.i_time_scale * 8; - filler = (rct->buffer_fill_final - buffer_size + scale - 1) / scale;
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/ratecontrol.h -> x264-snapshot-20150804-2245.tar.bz2/encoder/ratecontrol.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.h: ratecontrol ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/rdo.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/rdo.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * rdo.c: rate-distortion optimization ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Fiona Glaser <fiona@x264.com> @@ -180,7 +180,7 @@ else { x264_macroblock_size_cavlc( h ); - i_bits = ( h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8; + i_bits = ( (uint64_t)h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8; } h->mb.b_transform_8x8 = b_transform_bak; @@ -261,7 +261,7 @@ i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; + i_bits = (uint64_t)x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -297,7 +297,7 @@ i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; + i_bits = (uint64_t)x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -331,7 +331,7 @@ i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; + i_bits = (uint64_t)x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -357,7 +357,7 @@ i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_chroma_size_cavlc( h ) * i_lambda2; + i_bits = (uint64_t)x264_chroma_size_cavlc( h ) * i_lambda2; return (i_ssd<<8) + i_bits; }
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/set.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/set.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set: header writing ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -166,7 +166,7 @@ while( (1 << sps->i_log2_max_frame_num) <= max_frame_num ) sps->i_log2_max_frame_num++; - sps->i_poc_type = param->i_bframe || param->b_interlaced ? 0 : 2; + sps->i_poc_type = param->i_bframe || param->b_interlaced || param->i_avcintra_class ? 0 : 2; if( sps->i_poc_type == 0 ) { int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2; @@ -578,7 +578,7 @@ memcpy( payload, uuid, 16 ); sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - " - "Copy%s 2003-2014 - http://www.videolan.org/x264.html - options: %s", + "Copy%s 2003-2015 - http://www.videolan.org/x264.html - options: %s", X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts ); length = strlen(payload)+1; @@ -663,7 +663,7 @@ bs_write1( &q, quincunx_sampling_flag ); // quincunx_sampling_flag // 0: views are unrelated, 1: left view is on the left, 2: left view is on the right - bs_write ( &q, 6, 1 ); // content_interpretation_type + bs_write ( &q, 6, h->param.i_frame_packing != 6 ); // content_interpretation_type bs_write1( &q, 0 ); // spatial_flipping_flag bs_write1( &q, 0 ); // frame0_flipped_flag
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/set.h -> x264-snapshot-20150804-2245.tar.bz2/encoder/set.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * set.h: header writing ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/slicetype-cl.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/slicetype-cl.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead) ***************************************************************************** - * Copyright (C) 2012-2014 x264 project + * Copyright (C) 2012-2015 x264 project * * Authors: Steve Borho <sborho@multicorewareinc.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/encoder/slicetype.c -> x264-snapshot-20150804-2245.tar.bz2/encoder/slicetype.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * slicetype.c: lookahead analysis ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Fiona Glaser <fiona@x264.com> * Loren Merritt <lorenm@u.washington.edu> @@ -612,7 +612,6 @@ if( b_bidir ) { - int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy]; ALIGNED_ARRAY_8( int16_t, dmv,[2],[2] ); m[1].i_pixel = PIXEL_8x8; @@ -624,14 +623,20 @@ LOAD_HPELS_LUMA( m[1].p_fref, fref1->lowres ); m[1].p_fref_w = m[1].p_fref[0]; - dmv[0][0] = ( mvr[0] * dist_scale_factor + 128 ) >> 8; - dmv[0][1] = ( mvr[1] * dist_scale_factor + 128 ) >> 8; - dmv[1][0] = dmv[0][0] - mvr[0]; - dmv[1][1] = dmv[0][1] - mvr[1]; - CLIP_MV( dmv[0] ); - CLIP_MV( dmv[1] ); - if( h->param.analyse.i_subpel_refine <= 1 ) - M64( dmv ) &= ~0x0001000100010001ULL; /* mv & ~1 */ + if( fref1->lowres_mvs[0][p1-p0-1][0][0] != 0x7FFF ) + { + int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy]; + dmv[0][0] = ( mvr[0] * dist_scale_factor + 128 ) >> 8; + dmv[0][1] = ( mvr[1] * dist_scale_factor + 128 ) >> 8; + dmv[1][0] = dmv[0][0] - mvr[0]; + dmv[1][1] = dmv[0][1] - mvr[1]; + CLIP_MV( dmv[0] ); + CLIP_MV( dmv[1] ); + if( h->param.analyse.i_subpel_refine <= 1 ) + M64( dmv ) &= ~0x0001000100010001ULL; /* mv & ~1 */ + } + else + M64( dmv ) = 0; TRY_BIDIR( dmv[0], dmv[1], 0 ); if( M64( dmv ) ) @@ -1104,7 +1109,7 @@ if( b_intra ) x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 ); - while( i > 0 && frames[i]->i_type == X264_TYPE_B ) + while( i > 0 && IS_X264_TYPE_B( frames[i]->i_type ) ) i--; last_nonb = i; @@ -1132,7 +1137,7 @@ while( i-- > idx ) { cur_nonb = i; - while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 ) + while( IS_X264_TYPE_B( frames[cur_nonb]->i_type ) && cur_nonb > 0 ) cur_nonb--; if( cur_nonb < idx ) break; @@ -1226,7 +1231,7 @@ int last_nonb = 0, cur_nonb = 1, idx = 0; x264_frame_t *prev_frame = NULL; int prev_frame_idx = 0; - while( cur_nonb < num_frames && frames[cur_nonb]->i_type == X264_TYPE_B ) + while( cur_nonb < num_frames && IS_X264_TYPE_B( frames[cur_nonb]->i_type ) ) cur_nonb++; int next_nonb = keyframe ? last_nonb : cur_nonb; @@ -1278,7 +1283,7 @@ } last_nonb = cur_nonb; cur_nonb++; - while( cur_nonb <= num_frames && frames[cur_nonb]->i_type == X264_TYPE_B ) + while( cur_nonb <= num_frames && IS_X264_TYPE_B( frames[cur_nonb]->i_type ) ) cur_nonb++; } frames[next_nonb]->i_planned_type[idx] = X264_TYPE_AUTO; @@ -1288,36 +1293,39 @@ { int loc = 1; int cost = 0; - int cur_p = 0; + int cur_nonb = 0; path--; /* Since the 1st path element is really the second frame */ while( path[loc] ) { - int next_p = loc; - /* Find the location of the next P-frame. */ - while( path[next_p] != 'P' ) - next_p++; - - /* Add the cost of the P-frame found above */ - cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, next_p, 0 ); + int next_nonb = loc; + /* Find the location of the next non-B-frame. */ + while( path[next_nonb] == 'B' ) + next_nonb++; + + /* Add the cost of the non-B-frame found above */ + if( path[next_nonb] == 'P' ) + cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_nonb, 0 ); + else /* I-frame */ + cost += x264_slicetype_frame_cost( h, a, frames, next_nonb, next_nonb, next_nonb, 0 ); /* Early terminate if the cost we have found is larger than the best path cost so far */ if( cost > threshold ) break; - if( h->param.i_bframe_pyramid && next_p - cur_p > 2 ) + if( h->param.i_bframe_pyramid && next_nonb - cur_nonb > 2 ) { - int middle = cur_p + (next_p - cur_p)/2; - cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, middle, 0 ); + int middle = cur_nonb + (next_nonb - cur_nonb)/2; + cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, middle, 0 ); for( int next_b = loc; next_b < middle && cost < threshold; next_b++ ) - cost += x264_slicetype_frame_cost( h, a, frames, cur_p, middle, next_b, 0 ); - for( int next_b = middle+1; next_b < next_p && cost < threshold; next_b++ ) - cost += x264_slicetype_frame_cost( h, a, frames, middle, next_p, next_b, 0 ); + cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, middle, next_b, 0 ); + for( int next_b = middle+1; next_b < next_nonb && cost < threshold; next_b++ ) + cost += x264_slicetype_frame_cost( h, a, frames, middle, next_nonb, next_b, 0 ); } else - for( int next_b = loc; next_b < next_p && cost < threshold; next_b++ ) - cost += x264_slicetype_frame_cost( h, a, frames, cur_p, next_p, next_b, 0 ); + for( int next_b = loc; next_b < next_nonb && cost < threshold; next_b++ ) + cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_b, 0 ); - loc = next_p + 1; - cur_p = next_p; + loc = next_nonb + 1; + cur_nonb = next_nonb; } return cost; } @@ -1331,6 +1339,7 @@ char paths[2][X264_LOOKAHEAD_MAX+1]; int num_paths = X264_MIN( h->param.i_bframe+1, length ); int best_cost = COST_MAX; + int best_possible = 0; int idx = 0; /* Iterate over all currently possible paths */ @@ -1342,12 +1351,33 @@ memset( paths[idx]+len, 'B', path ); strcpy( paths[idx]+len+path, "P" ); - /* Calculate the actual cost of the current path */ - int cost = x264_slicetype_path_cost( h, a, frames, paths[idx], best_cost ); - if( cost < best_cost ) + int possible = 1; + for( int i = 1; i <= length; i++ ) { - best_cost = cost; - idx ^= 1; + int i_type = frames[i]->i_type; + if( i_type == X264_TYPE_AUTO ) + continue; + if( IS_X264_TYPE_B( i_type ) ) + possible = possible && (i < len || i == length || paths[idx][i-1] == 'B'); + else + { + possible = possible && (i < len || paths[idx][i-1] != 'B'); + paths[idx][i-1] = IS_X264_TYPE_I( i_type ) ? 'I' : 'P'; + } + } + + if( possible || !best_possible ) + { + if( possible && !best_possible ) + best_cost = COST_MAX; + /* Calculate the actual cost of the current path */ + int cost = x264_slicetype_path_cost( h, a, frames, paths[idx], best_cost ); + if( cost < best_cost ) + { + best_cost = cost; + best_possible = possible; + idx ^= 1; + } } } @@ -1441,13 +1471,15 @@ return scenecut_internal( h, a, frames, p0, p1, real_scenecut ); } +#define IS_X264_TYPE_AUTO_OR_I(x) ((x)==X264_TYPE_AUTO || IS_X264_TYPE_I(x)) +#define IS_X264_TYPE_AUTO_OR_B(x) ((x)==X264_TYPE_AUTO || IS_X264_TYPE_B(x)) + void x264_slicetype_analyse( x264_t *h, int intra_minigop ) { x264_mb_analysis_t a; x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, }; int num_frames, orig_num_frames, keyint_limit, framecnt; int i_mb_count = NUM_MBS;
View file
x264-snapshot-20141218-2245.tar.bz2/example.c -> x264-snapshot-20150804-2245.tar.bz2/example.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * example.c: libx264 API usage example ***************************************************************************** - * Copyright (C) 2014 x264 project + * Copyright (C) 2014-2015 x264 project * * Authors: Anton Mitrofanov <BugMaster@narod.ru> * @@ -24,26 +24,14 @@ *****************************************************************************/ #ifdef _WIN32 -/* The following two defines must be located before the inclusion of any system header files. */ -#define WINVER 0x0500 -#define _WIN32_WINNT 0x0500 -#include <windows.h> #include <io.h> /* _setmode() */ #include <fcntl.h> /* _O_BINARY */ #endif #include <stdint.h> #include <stdio.h> -#include <signal.h> #include <x264.h> -/* Ctrl-C handler */ -static volatile int b_ctrl_c = 0; -static void sigint_handler( int a ) -{ - b_ctrl_c = 1; -} - #define FAIL_IF_ERROR( cond, ... )\ do\ {\ @@ -72,9 +60,6 @@ _setmode( _fileno( stderr ), _O_BINARY ); #endif - /* Control-C handler */ - signal( SIGINT, sigint_handler ); - FAIL_IF_ERROR( !(argc > 1), "Example usage: example 352x288 <input.yuv >output.h264\n" ); FAIL_IF_ERROR( 2 != sscanf( argv[1], "%dx%d", &width, &height ), "resolution not specified or incorrect\n" ); @@ -105,17 +90,17 @@ #undef fail #define fail fail3 + int luma_size = width * height; + int chroma_size = luma_size / 4; /* Encode frames */ - for( ; !b_ctrl_c; i_frame++ ) + for( ;; i_frame++ ) { /* Read input frame */ - int plane_size = width * height; - if( fread( pic.img.plane[0], 1, plane_size, stdin ) != plane_size ) + if( fread( pic.img.plane[0], 1, luma_size, stdin ) != luma_size ) break; - plane_size = ((width + 1) >> 1) * ((height + 1) >> 1); - if( fread( pic.img.plane[1], 1, plane_size, stdin ) != plane_size ) + if( fread( pic.img.plane[1], 1, chroma_size, stdin ) != chroma_size ) break; - if( fread( pic.img.plane[2], 1, plane_size, stdin ) != plane_size ) + if( fread( pic.img.plane[2], 1, chroma_size, stdin ) != chroma_size ) break; pic.i_pts = i_frame; @@ -129,7 +114,7 @@ } } /* Flush delayed frames */ - while( !b_ctrl_c && x264_encoder_delayed_frames( h ) ) + while( x264_encoder_delayed_frames( h ) ) { i_frame_size = x264_encoder_encode( h, &nal, &i_nal, NULL, &pic_out ); if( i_frame_size < 0 )
View file
x264-snapshot-20141218-2245.tar.bz2/extras/avxsynth_c.h -> x264-snapshot-20150804-2245.tar.bz2/extras/avxsynth_c.h
Changed
@@ -33,8 +33,12 @@ #ifndef __AVXSYNTH_C__ #define __AVXSYNTH_C__ -#include "windowsPorts/windows2linux.h" #include <stdarg.h> +#include <stdint.h> + +typedef int64_t INT64; +#define __stdcall +#define __declspec(x) #ifdef __cplusplus # define EXTERN_C extern "C" @@ -64,12 +68,6 @@ # endif #endif -#ifdef __GNUC__ -typedef long long int INT64; -#else -typedef __int64 INT64; -#endif - ///////////////////////////////////////////////////////////////////// //
View file
x264-snapshot-20150804-2245.tar.bz2/extras/intel_dispatcher.h
Added
@@ -0,0 +1,46 @@ +/***************************************************************************** + * intel_dispatcher.h: intel compiler cpu dispatcher override + ***************************************************************************** + * Copyright (C) 2014-2015 x264 project + * + * Authors: Anton Mitrofanov <BugMaster@narod.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_INTEL_DISPATCHER_H +#define X264_INTEL_DISPATCHER_H + +/* Feature flags using _FEATURE_* defines from immintrin.h */ +extern unsigned long long __intel_cpu_feature_indicator; +extern unsigned long long __intel_cpu_feature_indicator_x; + +/* CPU vendor independent version of dispatcher */ +void __intel_cpu_features_init_x( void ); + +static void x264_intel_dispatcher_override( void ) +{ + if( __intel_cpu_feature_indicator & ~1ULL ) + return; + __intel_cpu_feature_indicator = 0; + __intel_cpu_feature_indicator_x = 0; + __intel_cpu_features_init_x(); + __intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x; +} + +#endif
View file
x264-snapshot-20141218-2245.tar.bz2/filters/filters.c -> x264-snapshot-20150804-2245.tar.bz2/filters/filters.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * filters.c: common filter functions ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Diogo Franco <diogomfranco@gmail.com> * Steven Walters <kemuri9@gmail.com>
View file
x264-snapshot-20141218-2245.tar.bz2/filters/filters.h -> x264-snapshot-20150804-2245.tar.bz2/filters/filters.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * filters.h: common filter functions ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Diogo Franco <diogomfranco@gmail.com> * Steven Walters <kemuri9@gmail.com>
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/cache.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/cache.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * cache.c: cache video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/crop.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/crop.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * crop.c: crop video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * James Darnley <james.darnley@gmail.com>
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/depth.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/depth.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * depth.c: bit-depth conversion video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Oskar Arvidsson <oskar@irock.se> * @@ -50,6 +50,7 @@ csp_mask == X264_CSP_YV16 || csp_mask == X264_CSP_YV24 || csp_mask == X264_CSP_NV12 || + csp_mask == X264_CSP_NV21 || csp_mask == X264_CSP_NV16 || csp_mask == X264_CSP_BGR || csp_mask == X264_CSP_RGB || @@ -59,7 +60,7 @@ static int csp_num_interleaved( int csp, int plane ) { int csp_mask = csp & X264_CSP_MASK; - return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 : + return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV21 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 : csp_mask == X264_CSP_BGR || csp_mask == X264_CSP_RGB ? 3 : csp_mask == X264_CSP_BGRA ? 4 : 1; @@ -73,10 +74,10 @@ static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \ int width, int height, int16_t *errors ) \ { \ - const int lshift = 16-BIT_DEPTH; \ - const int rshift = 16-BIT_DEPTH+2; \ - const int half = 1 << (16-BIT_DEPTH+1); \ - const int pixel_max = (1 << BIT_DEPTH)-1; \ + const int lshift = 16-X264_BIT_DEPTH; \ + const int rshift = 16-X264_BIT_DEPTH+2; \ + const int half = 1 << (16-X264_BIT_DEPTH+1); \ + const int pixel_max = (1 << X264_BIT_DEPTH)-1; \ memset( errors, 0, (width+1) * sizeof(int16_t) ); \ for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \ { \ @@ -136,7 +137,7 @@ static void scale_image( cli_image_t *output, cli_image_t *img ) { int csp_mask = img->csp & X264_CSP_MASK; - const int shift = BIT_DEPTH - 8; + const int shift = X264_BIT_DEPTH - 8; for( int i = 0; i < img->planes; i++ ) { uint8_t *src = img->plane[i]; @@ -216,7 +217,7 @@ ret = 1; } - FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this build supports only bit depth %d\n", BIT_DEPTH ) + FAIL_IF_ERROR( bit_depth != X264_BIT_DEPTH, "this build supports only bit depth %d\n", X264_BIT_DEPTH ) FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" ) /* only add the filter to the chain if it's needed */
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/fix_vfr_pts.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/fix_vfr_pts.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * fix_vfr_pts.c: vfr pts fixing video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/internal.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/internal.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * internal.c: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/internal.h -> x264-snapshot-20150804-2245.tar.bz2/filters/video/internal.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * internal.h: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/resize.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/resize.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * resize.c: resize video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -156,6 +156,7 @@ case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA; /* the next csp has no equivalent 16bit depth in swscale */ case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12; + case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21; /* the next csp is no supported by swscale at all */ case X264_CSP_NV16: default: return AV_PIX_FMT_NONE;
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/select_every.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/select_every.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * select_every.c: select-every video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/source.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/source.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * source.c: source video filter ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/video.c -> x264-snapshot-20150804-2245.tar.bz2/filters/video/video.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * video.c: video filters ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/filters/video/video.h -> x264-snapshot-20150804-2245.tar.bz2/filters/video/video.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * video.h: video filters ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/input/avs.c -> x264-snapshot-20150804-2245.tar.bz2/input/avs.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * avs.c: avisynth input ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -27,15 +27,15 @@ #if USE_AVXSYNTH #include <dlfcn.h> #if SYS_MACOSX -#define avs_open dlopen( "libavxsynth.dylib", RTLD_NOW ) +#define avs_open() dlopen( "libavxsynth.dylib", RTLD_NOW ) #else -#define avs_open dlopen( "libavxsynth.so", RTLD_NOW ) +#define avs_open() dlopen( "libavxsynth.so", RTLD_NOW ) #endif #define avs_close dlclose #define avs_address dlsym #else #include <windows.h> -#define avs_open LoadLibraryW( L"avisynth" ) +#define avs_open() LoadLibraryW( L"avisynth" ) #define avs_close FreeLibrary #define avs_address GetProcAddress #endif @@ -80,7 +80,7 @@ { AVS_Clip *clip; AVS_ScriptEnvironment *env; - HMODULE library; + void *library; int num_frames; struct { @@ -102,7 +102,7 @@ /* load the library and functions we require from it */ static int x264_avs_load_library( avs_hnd_t *h ) { - h->library = avs_open; + h->library = avs_open(); if( !h->library ) return -1; LOAD_AVS_FUNC( avs_clip_get_error, 0 ); @@ -175,8 +175,9 @@ FILE *fh = x264_fopen( psz_filename, "r" ); if( !fh ) return -1; - FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename ); + int b_regular = x264_is_regular_file( fh ); fclose( fh ); + FAIL_IF_ERROR( !b_regular, "AVS input is incompatible with non-regular file `%s'\n", psz_filename ); avs_hnd_t *h = malloc( sizeof(avs_hnd_t) ); if( !h )
View file
x264-snapshot-20141218-2245.tar.bz2/input/ffms.c -> x264-snapshot-20150804-2245.tar.bz2/input/ffms.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * ffms.c: ffmpegsource input ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Mike Gurlitz <mike.gurlitz@gmail.com> * Steven Walters <kemuri9@gmail.com>
View file
x264-snapshot-20141218-2245.tar.bz2/input/input.c -> x264-snapshot-20150804-2245.tar.bz2/input/input.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * input.c: common input functions ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Steven Walters <kemuri9@gmail.com> * @@ -33,6 +33,7 @@ [X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 }, [X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 }, [X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 }, + [X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 }, [X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 }, [X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 }, [X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
View file
x264-snapshot-20141218-2245.tar.bz2/input/input.h -> x264-snapshot-20150804-2245.tar.bz2/input/input.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * input.h: file input ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/input/lavf.c -> x264-snapshot-20150804-2245.tar.bz2/input/lavf.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * lavf.c: libavformat input ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Mike Gurlitz <mike.gurlitz@gmail.com> * Steven Walters <kemuri9@gmail.com>
View file
x264-snapshot-20141218-2245.tar.bz2/input/raw.c -> x264-snapshot-20150804-2245.tar.bz2/input/raw.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw input ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/input/thread.c -> x264-snapshot-20150804-2245.tar.bz2/input/thread.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * thread.c: threaded input ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/input/timecode.c -> x264-snapshot-20150804-2245.tar.bz2/input/timecode.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * timecode.c: timecode file input ***************************************************************************** - * Copyright (C) 2010-2014 x264 project + * Copyright (C) 2010-2015 x264 project * * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/input/y4m.c -> x264-snapshot-20150804-2245.tar.bz2/input/y4m.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * y4m.c: y4m input ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/output/flv.c -> x264-snapshot-20150804-2245.tar.bz2/output/flv.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * flv.c: flv muxer ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Kieran Kunhya <kieran@kunhya.com> * @@ -75,21 +75,29 @@ static int open_file( char *psz_filename, hnd_t *p_handle, cli_output_opt_t *opt ) { - *p_handle = NULL; flv_hnd_t *p_flv = calloc( 1, sizeof(flv_hnd_t) ); - if( !p_flv ) - return -1; - - p_flv->b_dts_compress = opt->use_dts_compress; - - p_flv->c = flv_create_writer( psz_filename ); - if( !p_flv->c ) - return -1; - - CHECK( write_header( p_flv->c ) ); - *p_handle = p_flv; + if( p_flv ) + { + flv_buffer *c = flv_create_writer( psz_filename ); + if( c ) + { + if( !write_header( c ) ) + { + p_flv->c = c; + p_flv->b_dts_compress = opt->use_dts_compress; + *p_handle = p_flv; + return 0; + } + + fclose( c->fp ); + free( c->data ); + free( c ); + } + free( p_flv ); + } - return 0; + *p_handle = NULL; + return -1; } static int set_param( hnd_t handle, x264_param_t *p_param ) @@ -293,15 +301,22 @@ return i_size; } -static void rewrite_amf_double( FILE *fp, uint64_t position, double value ) +static int rewrite_amf_double( FILE *fp, uint64_t position, double value ) { uint64_t x = endian_fix64( flv_dbl2int( value ) ); - fseek( fp, position, SEEK_SET ); - fwrite( &x, 8, 1, fp ); + return !fseek( fp, position, SEEK_SET ) && fwrite( &x, 8, 1, fp ) == 1 ? 0 : -1; } +#undef CHECK +#define CHECK(x)\ +do {\ + if( (x) < 0 )\ + goto error;\ +} while( 0 ) + static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts ) { + int ret = -1; flv_hnd_t *p_flv = handle; flv_buffer *c = p_flv->c; @@ -317,19 +332,22 @@ if( p_flv->i_framerate_pos ) { framerate = (double)p_flv->i_framenum / total_duration; - rewrite_amf_double( c->fp, p_flv->i_framerate_pos, framerate ); + CHECK( rewrite_amf_double( c->fp, p_flv->i_framerate_pos, framerate ) ); } - rewrite_amf_double( c->fp, p_flv->i_duration_pos, total_duration ); - rewrite_amf_double( c->fp, p_flv->i_filesize_pos, filesize ); - rewrite_amf_double( c->fp, p_flv->i_bitrate_pos, filesize * 8 / ( total_duration * 1000 ) ); + CHECK( rewrite_amf_double( c->fp, p_flv->i_duration_pos, total_duration ) ); + CHECK( rewrite_amf_double( c->fp, p_flv->i_filesize_pos, filesize ) ); + CHECK( rewrite_amf_double( c->fp, p_flv->i_bitrate_pos, filesize * 8 / ( total_duration * 1000 ) ) ); } + ret = 0; +error: fclose( c->fp ); - free( p_flv ); + free( c->data ); free( c ); + free( p_flv ); - return 0; + return ret; } const cli_output_t flv_output = { open_file, set_param, write_headers, write_frame, close_file };
View file
x264-snapshot-20141218-2245.tar.bz2/output/flv_bytestream.c -> x264-snapshot-20150804-2245.tar.bz2/output/flv_bytestream.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.c: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Kieran Kunhya <kieran@kunhya.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/output/flv_bytestream.h -> x264-snapshot-20150804-2245.tar.bz2/output/flv_bytestream.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.h: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Kieran Kunhya <kieran@kunhya.com> *
View file
x264-snapshot-20141218-2245.tar.bz2/output/matroska.c -> x264-snapshot-20150804-2245.tar.bz2/output/matroska.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * matroska.c: matroska muxer ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Mike Matsnev <mike@haali.su> * @@ -62,9 +62,14 @@ return 0; } +#define STEREO_COUNT 7 +static const uint8_t stereo_modes[STEREO_COUNT] = {5,9,7,1,3,13,0}; +static const uint8_t stereo_w_div[STEREO_COUNT] = {1,2,1,2,1,1,1}; +static const uint8_t stereo_h_div[STEREO_COUNT] = {1,1,2,1,2,1,1}; + static int set_param( hnd_t handle, x264_param_t *p_param ) { - mkv_hnd_t *p_mkv = handle; + mkv_hnd_t *p_mkv = handle; int64_t dw, dh; if( p_param->i_fps_num > 0 && !p_param->b_vfr_input ) @@ -77,25 +82,27 @@ p_mkv->frame_duration = 0; } - p_mkv->width = p_mkv->d_width = p_param->i_width; - p_mkv->height = p_mkv->d_height = p_param->i_height; + dw = p_mkv->width = p_param->i_width; + dh = p_mkv->height = p_param->i_height; p_mkv->display_size_units = DS_PIXELS; - p_mkv->stereo_mode = p_param->i_frame_packing; - + p_mkv->stereo_mode = -1; + if( p_param->i_frame_packing >= 0 && p_param->i_frame_packing < STEREO_COUNT ) + { + p_mkv->stereo_mode = stereo_modes[p_param->i_frame_packing]; + dw /= stereo_w_div[p_param->i_frame_packing]; + dh /= stereo_h_div[p_param->i_frame_packing]; + } if( p_param->vui.i_sar_width && p_param->vui.i_sar_height && p_param->vui.i_sar_width != p_param->vui.i_sar_height ) { if ( p_param->vui.i_sar_width > p_param->vui.i_sar_height ) { - dw = (int64_t)p_param->i_width * p_param->vui.i_sar_width / p_param->vui.i_sar_height; - dh = p_param->i_height; + dw = dw * p_param->vui.i_sar_width / p_param->vui.i_sar_height; } else { - dw = p_param->i_width; - dh = (int64_t)p_param->i_height * p_param->vui.i_sar_height / p_param->vui.i_sar_width; + dh = dh * p_param->vui.i_sar_height / p_param->vui.i_sar_width; } - - p_mkv->d_width = (int)dw; - p_mkv->d_height = (int)dh; } + p_mkv->d_width = (int)dw; + p_mkv->d_height = (int)dh; p_mkv->i_timebase_num = p_param->i_timebase_num; p_mkv->i_timebase_den = p_param->i_timebase_den; @@ -150,11 +157,11 @@ avcC, avcC_len, p_mkv->frame_duration, 50000, p_mkv->width, p_mkv->height, p_mkv->d_width, p_mkv->d_height, p_mkv->display_size_units, p_mkv->stereo_mode ); + free( avcC ); + if( ret < 0 ) return ret; - free( avcC ); - // SEI if( !p_mkv->b_writing_frame )
View file
x264-snapshot-20141218-2245.tar.bz2/output/matroska_ebml.c -> x264-snapshot-20150804-2245.tar.bz2/output/matroska_ebml.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.c: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Mike Matsnev <mike@haali.su> * @@ -317,8 +317,6 @@ return w; } -static const uint8_t mk_stereo_modes[6] = {5,9,7,1,3,13}; - int mk_write_header( mk_writer *w, const char *writing_app, const char *codec_id, const void *codec_private, unsigned codec_private_size, @@ -342,7 +340,7 @@ CHECK( mk_write_uint( c, 0x42f2, 4 ) ); // EBMLMaxIDLength CHECK( mk_write_uint( c, 0x42f3, 8 ) ); // EBMLMaxSizeLength CHECK( mk_write_string( c, 0x4282, "matroska") ); // DocType - CHECK( mk_write_uint( c, 0x4287, 2 ) ); // DocTypeVersion + CHECK( mk_write_uint( c, 0x4287, stereo_mode >= 0 ? 3 : 2 ) ); // DocTypeVersion CHECK( mk_write_uint( c, 0x4285, 2 ) ); // DocTypeReadversion CHECK( mk_close_context( c, 0 ) ); @@ -381,8 +379,8 @@ CHECK( mk_write_uint( v, 0x54b2, display_size_units ) ); CHECK( mk_write_uint( v, 0x54b0, d_width ) ); CHECK( mk_write_uint( v, 0x54ba, d_height ) ); - if( stereo_mode >= 0 && stereo_mode <= 5 ) - CHECK( mk_write_uint( v, 0x53b8, mk_stereo_modes[stereo_mode] ) ); + if( stereo_mode >= 0 ) + CHECK( mk_write_uint( v, 0x53b8, stereo_mode ) ); CHECK( mk_close_context( v, 0 ) ); CHECK( mk_close_context( ti, 0 ) );
View file
x264-snapshot-20141218-2245.tar.bz2/output/matroska_ebml.h -> x264-snapshot-20150804-2245.tar.bz2/output/matroska_ebml.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.h: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2014 x264 project + * Copyright (C) 2005-2015 x264 project * * Authors: Mike Matsnev <mike@haali.su> * @@ -27,10 +27,10 @@ #define X264_MATROSKA_EBML_H /* Matroska display size units from the spec */ -#define DS_PIXELS 0 -#define DS_CM 1 -#define DS_INCHES 2 -#define DS_ASPECT_RATIO 3 +#define DS_PIXELS 0 +#define DS_CM 1 +#define DS_INCHES 2 +#define DS_ASPECT_RATIO 3 typedef struct mk_writer mk_writer;
View file
x264-snapshot-20141218-2245.tar.bz2/output/mp4.c -> x264-snapshot-20150804-2245.tar.bz2/output/mp4.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mp4.c: mp4 muxer ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -169,8 +169,9 @@ FILE *fh = x264_fopen( psz_filename, "w" ); if( !fh ) return -1; - FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename ) + int b_regular = x264_is_regular_file( fh ); fclose( fh ); + FAIL_IF_ERR( !b_regular, "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename ) mp4_hnd_t *p_mp4 = calloc( 1, sizeof(mp4_hnd_t) ); if( !p_mp4 )
View file
x264-snapshot-20141218-2245.tar.bz2/output/mp4_lsmash.c -> x264-snapshot-20150804-2245.tar.bz2/output/mp4_lsmash.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * mp4_lsmash.c: mp4 muxer using L-SMASH ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/output/output.h -> x264-snapshot-20150804-2245.tar.bz2/output/output.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * output.h: x264 file output modules ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/output/raw.c -> x264-snapshot-20150804-2245.tar.bz2/output/raw.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw muxer ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/tools/checkasm-a.asm -> x264-snapshot-20150804-2245.tar.bz2/tools/checkasm-a.asm
Changed
@@ -1,7 +1,7 @@ ;***************************************************************************** ;* checkasm-a.asm: assembly check tool ;***************************************************************************** -;* Copyright (C) 2008-2014 x264 project +;* Copyright (C) 2008-2015 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Henrik Gramner <henrik@gramner.com> @@ -33,24 +33,24 @@ %if ARCH_X86_64 ; just random numbers to reduce the chance of incidental match ALIGN 16 -x6: ddq 0x79445c159ce790641a1b2550a612b48c -x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd -x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943 -x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2 -x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9 -x11: ddq 0x77d410d5c42c882d89b0c0765892729a -x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5 -x13: ddq 0xdd7b8919edd427862e8ec680de14b47c -x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf -x15: ddq 0x6de8f4c914c334d5011ff554472a7a10 -n7: dq 0x21f86d66c8ca00ce -n8: dq 0x75b6ba21077c48ad -n9: dq 0xed56bb2dcb3c7736 -n10: dq 0x8bda43d3fd1a7e06 -n11: dq 0xb64a9c9e5d318408 -n12: dq 0xdf9a54b303f1d3a3 -n13: dq 0x4a75479abd64e097 -n14: dq 0x249214109d5d1c88 +x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 +x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 +x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e +x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f +x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 +x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d +x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b +x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 +x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef +x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 +n7: dq 0x21f86d66c8ca00ce +n8: dq 0x75b6ba21077c48ad +n9: dq 0xed56bb2dcb3c7736 +n10: dq 0x8bda43d3fd1a7e06 +n11: dq 0xb64a9c9e5d318408 +n12: dq 0xdf9a54b303f1d3a3 +n13: dq 0x4a75479abd64e097 +n14: dq 0x249214109d5d1c88 %endif SECTION .text
View file
x264-snapshot-20141218-2245.tar.bz2/tools/checkasm.c -> x264-snapshot-20150804-2245.tar.bz2/tools/checkasm.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * checkasm.c: assembly check tool ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -97,6 +97,12 @@ asm volatile( "mftb %0" : "=r"(a) :: "memory" ); #elif ARCH_ARM // ARMv7 only asm volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(a) :: "memory" ); +#elif ARCH_AARCH64 + uint64_t b = 0; + asm volatile( "mrs %0, pmccntr_el0" : "=r"(b) :: "memory" ); + a = b; +#elif ARCH_MIPS + asm volatile( "rdhwr %0, $2" : "=r"(a) :: "memory" ); #endif return a; } @@ -167,12 +173,12 @@ continue; printf( "%s_%s%s: %"PRId64"\n", benchs[i].name, #if HAVE_MMX - b->cpu&X264_CPU_AVX2 && b->cpu&X264_CPU_FMA3 ? "avx2_fma3" : b->cpu&X264_CPU_AVX2 ? "avx2" : b->cpu&X264_CPU_FMA3 ? "fma3" : b->cpu&X264_CPU_FMA4 ? "fma4" : b->cpu&X264_CPU_XOP ? "xop" : b->cpu&X264_CPU_AVX ? "avx" : + b->cpu&X264_CPU_SSE42 ? "sse42" : b->cpu&X264_CPU_SSE4 ? "sse4" : b->cpu&X264_CPU_SSSE3 ? "ssse3" : b->cpu&X264_CPU_SSE3 ? "sse3" : @@ -189,6 +195,8 @@ #elif ARCH_AARCH64 b->cpu&X264_CPU_NEON ? "neon" : b->cpu&X264_CPU_ARMV8 ? "armv8" : +#elif ARCH_MIPS + b->cpu&X264_CPU_MSA ? "msa" : #endif "c", #if HAVE_MMX @@ -637,7 +645,7 @@ } \ predict_8x8[res_c>>16]( fdec1, edge ); \ int res_a = call_a( pixel_asm.name, fenc, fdec2, edge, bitcosts+8-pred_mode, satds_a ); \ - if( res_c != res_a || memcmp(satds_c, satds_a, sizeof(satds_c)) ) \ + if( res_c != res_a || memcmp(satds_c, satds_a, 16 * sizeof(*satds_c)) ) \ { \ ok = 0; \ fprintf( stderr, #name": %d,%d != %d,%d [FAILED]\n", res_c>>16, res_c&0xffff, res_a>>16, res_a&0xffff ); \ @@ -1409,6 +1417,32 @@ } } + if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap ) + { + set_func_name( "plane_copy_swap" ); + used_asm = 1; + for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + { + int w = (plane_specs[i].w + 1) >> 1; + int h = plane_specs[i].h; + intptr_t src_stride = plane_specs[i].src_stride; + intptr_t dst_stride = (2*w + 127) & ~63; + assert( dst_stride * h <= 0x1000 ); + pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); + memset( pbuf3, 0, 0x1000*sizeof(pixel) ); + memset( pbuf4, 0, 0x1000*sizeof(pixel) ); + call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h ); + call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h ); + for( int y = 0; y < h; y++ ) + if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) ) + { + ok = 0; + fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride ); + break; + } + } + } + if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave ) { set_func_name( "plane_copy_interleave" ); @@ -1496,7 +1530,7 @@ if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 ) { set_func_name( "plane_copy_deinterleave_v210" ); - used_asm = 1; + ok = 1; used_asm = 1; for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) { int w = (plane_specs[i].w + 1) >> 1; @@ -1517,8 +1551,8 @@ break; } } + report( "v210 :" ); } - report( "v210 :" ); if( mc_a.hpel_filter != mc_ref.hpel_filter ) { @@ -2311,12 +2345,16 @@ {\ fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\ ok = 0;\ - for( int k = -1; k < 16; k++ )\ - printf( "%2x ", edge[16+k] );\ - printf( "\n" );\ + if( ip_c.name == (void *)ip_c.predict_8x8 )\ + {\ + for( int k = -1; k < 16; k++ )\ + printf( "%2x ", edge[16+k] );\ + printf( "\n" );\ + }\ for( int j = 0; j < h; j++ )\ {\ - printf( "%2x ", edge[14-j] );\ + if( ip_c.name == (void *)ip_c.predict_8x8 )\ + printf( "%2x ", edge[14-j] );\ for( int k = 0; k < w; k++ )\ printf( "%2x ", pbuf4[48+k+j*FDEC_STRIDE] );\ printf( "\n" );\ @@ -2324,7 +2362,8 @@ printf( "\n" );\ for( int j = 0; j < h; j++ )\ {\ - printf( " " );\ + if( ip_c.name == (void *)ip_c.predict_8x8 )\ + printf( " " );\ for( int k = 0; k < w; k++ )\ printf( "%2x ", pbuf3[48+k+j*FDEC_STRIDE] );\ printf( "\n" );\ @@ -2428,6 +2467,8 @@ DECL_CABAC(c) #if HAVE_MMX DECL_CABAC(asm) +#elif defined(ARCH_AARCH64) +DECL_CABAC(asm) #else #define run_cabac_decision_asm run_cabac_decision_c #define run_cabac_bypass_asm run_cabac_bypass_c @@ -2646,7 +2687,7 @@ #endif if( cpu_detect & X264_CPU_LZCNT ) { - ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "MMX_LZCNT" ); + ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "MMX LZCNT" ); cpu1 &= ~X264_CPU_LZCNT; } ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "MMX SlowCTZ" ); @@ -2664,11 +2705,11 @@ cpu1 &= ~X264_CPU_SLOW_SHUFFLE; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" ); cpu1 &= ~X264_CPU_SLOW_CTZ; - } - if( cpu_detect & X264_CPU_LZCNT ) - { - ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" ); - cpu1 &= ~X264_CPU_LZCNT; + if( cpu_detect & X264_CPU_LZCNT ) + { + ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE2 LZCNT" ); + cpu1 &= ~X264_CPU_LZCNT; + } } if( cpu_detect & X264_CPU_SSE3 ) { @@ -2688,9 +2729,16 @@ ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_64, "SSSE3 Cache64 SlowAtom" ); cpu1 &= ~X264_CPU_CACHELINE_64; cpu1 &= ~X264_CPU_SLOW_ATOM; + if( cpu_detect & X264_CPU_LZCNT ) + { + ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSSE3 LZCNT" ); + cpu1 &= ~X264_CPU_LZCNT; + } } if( cpu_detect & X264_CPU_SSE4 ) ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4, "SSE4" ); + if( cpu_detect & X264_CPU_SSE42 ) + ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE42, "SSE4.2" ); if( cpu_detect & X264_CPU_AVX ) ret |= add_flags( &cpu0, &cpu1, X264_CPU_AVX, "AVX" ); if( cpu_detect & X264_CPU_XOP ) @@ -2700,30 +2748,30 @@ ret |= add_flags( &cpu0, &cpu1, X264_CPU_FMA4, "FMA4" ); cpu1 &= ~X264_CPU_FMA4; } - if( cpu_detect & X264_CPU_BMI1 ) + if( cpu_detect & X264_CPU_FMA3 ) { - ret |= add_flags( &cpu0, &cpu1, X264_CPU_BMI1, "BMI1" ); - cpu1 &= ~X264_CPU_BMI1; + ret |= add_flags( &cpu0, &cpu1, X264_CPU_FMA3, "FMA3" ); + cpu1 &= ~X264_CPU_FMA3;
View file
x264-snapshot-20150804-2245.tar.bz2/tools/gas-preprocessor.pl
Added
@@ -0,0 +1,1033 @@ +#!/usr/bin/env perl +# by David Conrad +# This code is licensed under GPLv2 or later; go to gnu.org to read it +# (not that it much matters for an asm preprocessor) +# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc" +use strict; + +# Apple's gas is ancient and doesn't support modern preprocessing features like +# .rept and has ugly macro syntax, among other things. Thus, this script +# implements the subset of the gas preprocessor used by x264 and ffmpeg +# that isn't supported by Apple's gas. + +my %canonical_arch = ("aarch64" => "aarch64", "arm64" => "aarch64", + "arm" => "arm", + "powerpc" => "powerpc", "ppc" => "powerpc"); + +my %comments = ("aarch64" => '//', + "arm" => '@', + "powerpc" => '#'); + +my @gcc_cmd; +my @preprocess_c_cmd; + +my $comm; +my $arch; +my $as_type = "apple-gas"; + +my $fix_unreq = $^O eq "darwin"; +my $force_thumb = 0; + +my $arm_cond_codes = "eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo"; + +my $usage_str = " +$0\n +Gas-preprocessor.pl converts assembler files using modern GNU as syntax for +Apple's ancient gas version or clang's incompatible integrated assembler. The +conversion is regularly tested for Libav, x264 and vlc. Other projects might +use different features which are not correctly handled. + +Options for this program needs to be separated with ' -- ' from the assembler +command. Following options are currently supported: + + -help - this usage text + -arch - target architecture + -as-type - one value out of {{,apple-}{gas,clang},armasm} + -fix-unreq + -no-fix-unreq + -force-thumb - assemble as thumb regardless of the input source + (note, this is incomplete and only works for sources + it explicitly was tested with) +"; + +sub usage() { + print $usage_str; +} + +while (@ARGV) { + my $opt = shift; + + if ($opt =~ /^-(no-)?fix-unreq$/) { + $fix_unreq = $1 ne "no-"; + } elsif ($opt eq "-force-thumb") { + $force_thumb = 1; + } elsif ($opt eq "-arch") { + $arch = shift; + die "unknown arch: '$arch'\n" if not exists $comments{$arch}; + } elsif ($opt eq "-as-type") { + $as_type = shift; + die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/; + } elsif ($opt eq "-help") { + usage(); + exit 0; + } elsif ($opt eq "--" ) { + @gcc_cmd = @ARGV; + } elsif ($opt =~ /^-/) { + die "option '$opt' is not known. See '$0 -help' for usage information\n"; + } else { + push @gcc_cmd, $opt, @ARGV; + } + last if (@gcc_cmd); +} + +if (grep /\.c$/, @gcc_cmd) { + # C file (inline asm?) - compile + @preprocess_c_cmd = (@gcc_cmd, "-S"); +} elsif (grep /\.[sS]$/, @gcc_cmd) { + # asm file, just do C preprocessor + @preprocess_c_cmd = (@gcc_cmd, "-E"); +} elsif (grep /-(v|h|-version|dumpversion)/, @gcc_cmd) { + # pass -v/--version along, used during probing. Matching '-v' might have + # uninteded results but it doesn't matter much if gas-preprocessor or + # the compiler fails. + exec(@gcc_cmd); +} else { + die "Unrecognized input filetype"; +} +if ($as_type eq "armasm") { + + $preprocess_c_cmd[0] = "cpp"; + push(@preprocess_c_cmd, "-U__ELF__"); + push(@preprocess_c_cmd, "-U__MACH__"); + + @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd; + # Remove -ignore XX parameter pairs from preprocess_c_cmd + my $index = 1; + while ($index < $#preprocess_c_cmd) { + if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) { + splice(@preprocess_c_cmd, $index, 2); + next; + } + $index++; + } + if (grep /^-MM$/, @preprocess_c_cmd) { + system(@preprocess_c_cmd) == 0 or die "Error running preprocessor"; + exit 0; + } +} + +# if compiling, avoid creating an output file named '-.o' +if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) { + foreach my $i (@gcc_cmd) { + if ($i =~ /\.[csS]$/) { + my $outputfile = $i; + $outputfile =~ s/\.[csS]$/.o/; + push(@gcc_cmd, "-o"); + push(@gcc_cmd, $outputfile); + last; + } + } +} +# replace only the '-o' argument with '-', avoids rewriting the make dependency +# target specified with -MT to '-' +my $index = 1; +while ($index < $#preprocess_c_cmd) { + if ($preprocess_c_cmd[$index] eq "-o") { + $index++; + $preprocess_c_cmd[$index] = "-"; + } + $index++; +} + +my $tempfile; +if ($as_type ne "armasm") { + @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd; +} else { + @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd; + @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd; + + @preprocess_c_cmd = grep ! /^-G/, @preprocess_c_cmd; + @preprocess_c_cmd = grep ! /^-W/, @preprocess_c_cmd; + @preprocess_c_cmd = grep ! /^-Z/, @preprocess_c_cmd; + @preprocess_c_cmd = grep ! /^-fp/, @preprocess_c_cmd; + @preprocess_c_cmd = grep ! /^-EHsc$/, @preprocess_c_cmd; + @preprocess_c_cmd = grep ! /^-O/, @preprocess_c_cmd; + + @gcc_cmd = grep ! /^-G/, @gcc_cmd; + @gcc_cmd = grep ! /^-W/, @gcc_cmd; + @gcc_cmd = grep ! /^-Z/, @gcc_cmd; + @gcc_cmd = grep ! /^-fp/, @gcc_cmd; + @gcc_cmd = grep ! /^-EHsc$/, @gcc_cmd; + @gcc_cmd = grep ! /^-O/, @gcc_cmd; + + my @outfiles = grep /\.(o|obj)$/, @gcc_cmd; + $tempfile = $outfiles[0].".asm"; + + # Remove most parameters from gcc_cmd, which actually is the armasm command, + # which doesn't support any of the common compiler/preprocessor options. + @gcc_cmd = grep ! /^-D/, @gcc_cmd; + @gcc_cmd = grep ! /^-U/, @gcc_cmd; + @gcc_cmd = grep ! /^-m/, @gcc_cmd; + @gcc_cmd = grep ! /^-M/, @gcc_cmd; + @gcc_cmd = grep ! /^-c$/, @gcc_cmd; + @gcc_cmd = grep ! /^-I/, @gcc_cmd; + @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd; +} + +# detect architecture from gcc binary name +if (!$arch) { + if ($gcc_cmd[0] =~ /(arm64|aarch64|arm|powerpc|ppc)/) { + $arch = $1; + } else { + # look for -arch flag + foreach my $i (1 .. $#gcc_cmd-1) { + if ($gcc_cmd[$i] eq "-arch" and + $gcc_cmd[$i+1] =~ /(arm64|aarch64|arm|powerpc|ppc)/) { + $arch = $1; + } + } + } +} + +# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name +$arch = qx/arch/ if (!$arch); + +die "Unknown target architecture '$arch'" if not exists $canonical_arch{$arch}; + +$arch = $canonical_arch{$arch}; +$comm = $comments{$arch}; +my $inputcomm = $comm;
View file
x264-snapshot-20141218-2245.tar.bz2/x264.c -> x264-snapshot-20150804-2245.tar.bz2/x264.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264: top-level x264cli functions ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * Laurent Aimar <fenrir@via.ecp.fr> @@ -209,6 +209,13 @@ #endif 0 }; +static const char * const chroma_format_names[] = +{ + [0] = "all", + [X264_CSP_I420] = "i420", + [X264_CSP_I422] = "i422", + [X264_CSP_I444] = "i444" +}; static const char * const range_names[] = { "auto", "tv", "pc", 0 }; @@ -325,7 +332,8 @@ #else printf( "using an unknown compiler\n" ); #endif - printf( "configuration: --bit-depth=%d --chroma-format=%s\n", x264_bit_depth, X264_CHROMA_FORMAT ? (output_csp_names[0]+1) : "all" ); + printf( "x264 configuration: --bit-depth=%d --chroma-format=%s\n", X264_BIT_DEPTH, chroma_format_names[X264_CHROMA_FORMAT] ); + printf( "libx264 configuration: --bit-depth=%d --chroma-format=%s\n", x264_bit_depth, chroma_format_names[x264_chroma_format] ); printf( "x264 license: " ); #if HAVE_GPL printf( "GPL version 2 or later\n" ); @@ -533,7 +541,7 @@ " Overrides all settings.\n" ); H2( #if X264_CHROMA_FORMAT <= X264_CSP_I420 -#if BIT_DEPTH==8 +#if X264_BIT_DEPTH==8 " - baseline:\n" " --no-8x8dct --bframes 0 --no-cabac\n" " --cqm flat --weightp 0\n" @@ -561,7 +569,7 @@ else H0( " - " #if X264_CHROMA_FORMAT <= X264_CSP_I420 -#if BIT_DEPTH==8 +#if X264_BIT_DEPTH==8 "baseline,main,high," #endif "high10," @@ -703,7 +711,9 @@ " - 2: row alternation - L and R are interlaced by row\n" " - 3: side by side - L is on the left, R on the right\n" " - 4: top bottom - L is on top, R on bottom\n" - " - 5: frame alternation - one view per frame\n" ); + " - 5: frame alternation - one view per frame\n" + " - 6: mono - 2D frame without any frame packing\n" + " - 7: tile format - L is on top-left, R split across\n" ); H0( "\n" ); H0( "Ratecontrol:\n" ); H0( "\n" ); @@ -726,7 +736,8 @@ H2( " --aq-mode <integer> AQ method [%d]\n" " - 0: Disabled\n" " - 1: Variance AQ (complexity mask)\n" - " - 2: Auto-variance AQ (experimental)\n", defaults->rc.i_aq_mode ); + " - 2: Auto-variance AQ\n" + " - 3: Auto-variance AQ with bias to dark scenes\n", defaults->rc.i_aq_mode ); H1( " --aq-strength <float> Reduces blocking and blurring in flat and\n" " textured areas. [%.1f]\n", defaults->rc.f_aq_strength ); H1( "\n" ); @@ -1286,11 +1297,11 @@ /* force the output csp to what the user specified (or the default) */ param->i_csp = info->csp; int csp = info->csp & X264_CSP_MASK; - if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp > X264_CSP_NV12) ) + if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp >= X264_CSP_I422) ) param->i_csp = X264_CSP_I420; - else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp > X264_CSP_V210) ) + else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp >= X264_CSP_I444) ) param->i_csp = X264_CSP_I422; - else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp > X264_CSP_YV24) ) + else if( output_csp == X264_CSP_I444 && (csp < X264_CSP_I444 || csp >= X264_CSP_BGR) ) param->i_csp = X264_CSP_I444; else if( output_csp == X264_CSP_RGB && (csp < X264_CSP_BGR || csp > X264_CSP_RGB) ) param->i_csp = X264_CSP_RGB;
View file
x264-snapshot-20141218-2245.tar.bz2/x264.h -> x264-snapshot-20150804-2245.tar.bz2/x264.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264.h: x264 public header ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> @@ -29,7 +29,7 @@ #define X264_X264_H #if !defined(_STDINT_H) && !defined(_STDINT_H_) && !defined(_STDINT_H_INCLUDED) && !defined(_STDINT) &&\ - !defined(_INTTYPES_H) && !defined(_INTTYPES_H_) && !defined(_INTTYPES) + !defined(_SYS_STDINT_H_) && !defined(_INTTYPES_H) && !defined(_INTTYPES_H_) && !defined(_INTTYPES) # ifdef _MSC_VER # pragma message("You must include stdint.h or inttypes.h before x264.h") # else @@ -41,7 +41,7 @@ #include "x264_config.h" -#define X264_BUILD 142 +#define X264_BUILD 148 /* Application developers planning to link against a shared library version of * libx264 from a Microsoft Visual Studio or similar development environment @@ -129,8 +129,8 @@ #define X264_CPU_AVX 0x0000400 /* AVX support: requires OS support even if YMM registers aren't used. */ #define X264_CPU_XOP 0x0000800 /* AMD XOP */ #define X264_CPU_FMA4 0x0001000 /* AMD FMA4 */ -#define X264_CPU_AVX2 0x0002000 /* AVX2 */ -#define X264_CPU_FMA3 0x0004000 /* Intel FMA3 */ +#define X264_CPU_FMA3 0x0002000 /* FMA3 */ +#define X264_CPU_AVX2 0x0004000 /* AVX2 */ #define X264_CPU_BMI1 0x0008000 /* BMI1 */ #define X264_CPU_BMI2 0x0010000 /* BMI2 */ /* x86 modifiers */ @@ -158,6 +158,9 @@ #define X264_CPU_FAST_NEON_MRC 0x0000004 /* Transfer from NEON to ARM register is fast (Cortex-A9) */ #define X264_CPU_ARMV8 0x0000008 +/* MIPS */ +#define X264_CPU_MSA 0x0000001 /* MIPS MSA */ + /* Analyse flags */ #define X264_ANALYSE_I4x4 0x0001 /* Analyse i4x4 */ #define X264_ANALYSE_I8x8 0x0002 /* Analyse i8x8 (requires 8x8 transform) */ @@ -183,6 +186,7 @@ #define X264_AQ_NONE 0 #define X264_AQ_VARIANCE 1 #define X264_AQ_AUTOVARIANCE 2 +#define X264_AQ_AUTOVARIANCE_BIASED 3 #define X264_B_ADAPT_NONE 0 #define X264_B_ADAPT_FAST 1 #define X264_B_ADAPT_TRELLIS 2 @@ -213,16 +217,17 @@ #define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */ #define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */ #define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */ -#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */ -#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */ -#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */ -#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */ -#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */ -#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */ -#define X264_CSP_BGR 0x000a /* packed bgr 24bits */ -#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */ -#define X264_CSP_RGB 0x000c /* packed rgb 24bits */ -#define X264_CSP_MAX 0x000d /* end of list */ +#define X264_CSP_NV21 0x0004 /* yuv 4:2:0, with one y plane and one packed v+u */ +#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */ +#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */ +#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */ +#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */ +#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */ +#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */ +#define X264_CSP_BGR 0x000b /* packed bgr 24bits */ +#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */ +#define X264_CSP_RGB 0x000d /* packed rgb 24bits */ +#define X264_CSP_MAX 0x000e /* end of list */ #define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */ #define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */ @@ -234,7 +239,7 @@ #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */ #define X264_TYPE_B 0x0005 #define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */ -#define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR) +#define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR || (x)==X264_TYPE_KEYFRAME) #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF) /* Log level */ @@ -789,8 +794,6 @@ /* In: force picture type (if not auto) * If x264 encoding parameters are violated in the forcing of picture types, * x264 will correct the input picture type and log a warning. - * The quality of frametype decisions may suffer if a great deal of fine-grained - * mixing of auto and forced frametypes is done. * Out: type of the picture encoded */ int i_type; /* In: force quantizer for != X264_QP_AUTO */
View file
x264-snapshot-20141218-2245.tar.bz2/x264cli.h -> x264-snapshot-20150804-2245.tar.bz2/x264cli.h
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264cli.h: x264cli common ***************************************************************************** - * Copyright (C) 2003-2014 x264 project + * Copyright (C) 2003-2015 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu>
View file
x264-snapshot-20141218-2245.tar.bz2/x264dll.c -> x264-snapshot-20150804-2245.tar.bz2/x264dll.c
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264dll: x264 DLLMain for win32 ***************************************************************************** - * Copyright (C) 2009-2014 x264 project + * Copyright (C) 2009-2015 x264 project * * Authors: Anton Mitrofanov <BugMaster@narod.ru> *
View file
x264-snapshot-20141218-2245.tar.bz2/x264res.rc -> x264-snapshot-20150804-2245.tar.bz2/x264res.rc
Changed
@@ -1,7 +1,7 @@ /***************************************************************************** * x264res.rc: windows resource file ***************************************************************************** - * Copyright (C) 2012-2014 x264 project + * Copyright (C) 2012-2015 x264 project * * Authors: Henrik Gramner <henrik@gramner.com> * @@ -60,7 +60,7 @@ #endif VALUE "FileVersion", X264_POINTVER VALUE "InternalName", "x264" - VALUE "LegalCopyright", "Copyright (C) 2003-2014 x264 project" + VALUE "LegalCopyright", "Copyright (C) 2003-2015 x264 project" #ifdef DLL VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll" #else
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.