Projects
Staging
x265
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 12
View file
x265.changes
Changed
@@ -1,4 +1,40 @@ ------------------------------------------------------------------- +Mon Jun 1 17:51:22 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> + +- Update to version 3.4 + New features: + * Edge-aware quadtree partitioning to terminate CU depth + recursion based on edge information. --rskip level 2 enables + the feature and --rskip-edge-threshold denotes the minimum + expected edge-density percentage within the CU, below which + the recursion is skipped. Experimental feature. + * Application-level feature --abr-ladder for automating + efficient ABR ladder generation. Shows ~65% savings in the + over-all turn-around time required for the generation of a + typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 + CPU @ 2.70GHz over a sequential ABR-ladder generation + approach that leverages save-load architecture. + Enhancements to existing features: + * Improved efficiency in 2-pass rate-control algorithm. The + savings in the bitrate is ~1.72% with visual improvement in + quality in the initial 1-2 secs. + Encoder enhancements: + * Faster ARM64 encodes enabled by ASM contributions from + Huawei. The speed-up over no-asm version for 1080p encodes @ + medium preset is ~15% in a 16 core H/W. + * Strict VBV conformance in zone encoding. + Bug fixes: + * Multi-pass encode failures with --frame-dup. + * Corrupted bitstreams with --hist-scenecut when input depth + and internal bit-depth differ. + * Incorrect analysis propagation in multi-level save-load + architecture. + * Failure in detecting NUMA packages installed in non-standard + directories. + +- Refreshed arm.patch + +------------------------------------------------------------------- Sat Mar 28 14:28:56 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> - Update to version 3.3
View file
x265.spec
Changed
@@ -17,11 +17,11 @@ # -%define sover 188 +%define sover 192 %define libname lib%{name} %define libsoname %{libname}-%{sover} Name: x265 -Version: 3.3 +Version: 3.4 Release: 0 Summary: A free h265/HEVC encoder - encoder binary License: GPL-2.0-or-later @@ -67,7 +67,6 @@ %patch0 -p1 %patch1 -p1 %patch2 -p1 - sed -i -e "s/0.0/%{sover}.0/g" source/cmake/version.cmake
View file
arm.patch
Changed
@@ -1,8 +1,8 @@ -Index: x265_2.2/source/CMakeLists.txt +Index: x265_3.4/source/CMakeLists.txt =================================================================== ---- x265_2.2.orig/source/CMakeLists.txt -+++ x265_2.2/source/CMakeLists.txt -@@ -65,15 +65,22 @@ elseif(POWERMATCH GREATER "-1") +--- x265_3.4.orig/source/CMakeLists.txt ++++ x265_3.4/source/CMakeLists.txt +@@ -64,26 +64,26 @@ elseif(POWERMATCH GREATER "-1") add_definitions(-DPPC64=1) message(STATUS "Detected POWER PPC64 target processor") endif() @@ -12,41 +12,62 @@ - else() - set(CROSS_COMPILE_ARM 0) - endif() -- message(STATUS "Detected ARM target processor") - set(ARM 1) -- add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) +- if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) +- message(STATUS "Detected ARM64 target processor") +- set(ARM64 1) +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) +- else() +- message(STATUS "Detected ARM target processor") +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) +- endif() +elseif(${SYSPROC} MATCHES "armv5.*") + message(STATUS "Detected ARMV5 system processor") + set(ARMV5 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=0 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv6l") + message(STATUS "Detected ARMV6 system processor") + set(ARMV6 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv7l") + message(STATUS "Detected ARMV7 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "aarch64") + message(STATUS "Detected AArch64 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") -@@ -208,18 +215,9 @@ if(GCC) + endif() +- + if(UNIX) + list(APPEND PLATFORM_LIBS pthread) + find_library(LIBRT rt) +@@ -238,28 +238,9 @@ if(GCC) endif() endif() endif() - if(ARM AND CROSS_COMPILE_ARM) -- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) +- else() +- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- endif() +- message(STATUS "cross compile arm") - elseif(ARM) -- find_package(Neon) -- if(CPU_HAS_NEON) -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) - add_definitions(-DHAVE_NEON) - else() -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- find_package(Neon) +- if(CPU_HAS_NEON) +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- add_definitions(-DHAVE_NEON) +- else() +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- endif() - endif() + if(ARMV7) + add_definitions(-fPIC) @@ -55,11 +76,11 @@ if(FPROFILE_GENERATE) if(INTEL_CXX) add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}") -Index: x265_2.2/source/common/cpu.cpp +Index: x265_3.4/source/common/cpu.cpp =================================================================== ---- x265_2.2.orig/source/common/cpu.cpp -+++ x265_2.2/source/common/cpu.cpp -@@ -37,7 +37,7 @@ +--- x265_3.4.orig/source/common/cpu.cpp ++++ x265_3.4/source/common/cpu.cpp +@@ -39,7 +39,7 @@ #include <machine/cpu.h> #endif @@ -68,7 +89,7 @@ #include <signal.h> #include <setjmp.h> static sigjmp_buf jmpbuf; -@@ -344,7 +344,6 @@ uint32_t cpu_detect(void) +@@ -350,7 +350,6 @@ uint32_t cpu_detect(bool benableavx512) } canjump = 1; @@ -76,7 +97,7 @@ canjump = 0; signal(SIGILL, oldsig); #endif // if !HAVE_NEON -@@ -360,7 +359,7 @@ uint32_t cpu_detect(void) +@@ -366,7 +365,7 @@ uint32_t cpu_detect(bool benableavx512) // which may result in incorrect detection and the counters stuck enabled. // right now Apple does not seem to support performance counters for this test #ifndef __MACH__ @@ -84,4 +105,4 @@ + //flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) - #endif // if HAVE_ARMV6 + #elif X265_ARCH_ARM64
View file
baselibs.conf
Changed
@@ -1,1 +1,1 @@ -libx265-179 +libx265-192
View file
x265_3.3.tar.gz/.hg_archival.txt -> x265_3.4.tar.gz/.hg_archival.txt
Changed
@@ -1,5 +1,4 @@ repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf -node: f94b0d32737d40b2b9a9d74df57fee45e6be5cb0 -branch: Release_3.3 -latesttag: 3.3 -latesttagdistance: 1 +node: 2a65b720985096bcb1664f7cb05c3d04aeb576f5 +branch: Release_3.4 +tag: 3.4
View file
x265_3.3.tar.gz/.hgtags -> x265_3.4.tar.gz/.hgtags
Changed
@@ -40,3 +40,4 @@ 5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1 96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2 057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3 +ee92f36782800f145970131e01c79955a3ed5c10 3.4_RC1
View file
x265_3.4.tar.gz/build/aarch64-linux/crosscompile.cmake
Added
@@ -0,0 +1,15 @@ +# CMake toolchain file for cross compiling x265 for aarch64 +# This feature is only supported as experimental. Use with caution. +# Please report bugs on bitbucket +# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source + +set(CROSS_COMPILE_ARM 1) +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +# specify the cross compiler +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) + +# specify the target environment +SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
View file
x265_3.4.tar.gz/build/aarch64-linux/make-Makefiles.bash
Added
@@ -0,0 +1,4 @@ +#!/bin/bash +# Run this from within a bash shell + +cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source
View file
x265_3.3.tar.gz/doc/reST/cli.rst -> x265_3.4.tar.gz/doc/reST/cli.rst
Changed
@@ -107,6 +107,9 @@ **BufferFillFinal** Buffer bits available after removing the frame out of CPB. + **UnclippedBufferFillFinal** Unclipped buffer bits available after removing the frame + out of CPB only used for csv logging purpose. + **Latency** Latency in terms of number of frames between when the frame was given in and when the frame is given out. @@ -842,15 +845,31 @@ Measure 2Nx2N merge candidates first; if no residual is found, additional modes at that depth are not analysed. Default disabled -.. option:: --rskip, --no-rskip +.. option:: --rskip <0|1|2> + + This option determines early exit from CU depth recursion in modes 1 and 2. When a skip CU is + found, additional heuristics (depending on the RD level and rskip mode) are used to decide whether + to terminate recursion. The following table summarizes the behavior. + + +----------+------------+----------------------------------------------------------------+ + | RD Level | Rskip Mode | Skip Recursion Heuristic | + +==========+============+================================================================+ + | 0 - 4 | 1 | Neighbour costs and CU homogenity. | + +----------+------------+----------------------------------------------------------------+ + | 5 - 6 | 1 | Comparison with inter2Nx2N. | + +----------+------------+----------------------------------------------------------------+ + | 0 - 6 | 2 | CU edge density. | + +----------+------------+----------------------------------------------------------------+ + + Provides minimal quality degradation at good performance gains for non-zero modes. + :option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used. + This is a integer value representing the edge-density percentage within the CU. Internally normalized to a number between 0.0 to 1.0 in x265. + Recommended low thresholds for slow encodes and high for fast encodes. - This option determines early exit from CU depth recursion. When a skip CU is - found, additional heuristics (depending on rd-level) are used to decide whether - to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, - while at rdlevels 4 and neighbour costs are used to skip recursion. - Provides minimal quality degradation at good performance gains when enabled. +.. option:: --rskip-edge-threshold <0..100> - Default: enabled, disabled for :option:`--tune grain` + Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. + Default: 5, requires :option:`--rskip mode 2` to be enabled. .. option:: --splitrd-skip, --no-splitrd-skip @@ -2501,6 +2520,28 @@ --recon-y4m-exec "ffplay -i pipe:0 -autoexit" **CLI ONLY** + +ABR-ladder Options +================== + +.. option:: --abr-ladder <filename> + + File containing the encoder configurations to generate ABR ladder. + The format of each line is: + + **<encID:reuse-level:refID> <CLI>** + + where, encID indicates the unique name given to the encode, refID indicates + the name of the encode from which analysis info has to be re-used ( set to 'nil' + if analysis reuse isn't preferred ), and reuse-level indicates the level ( :option:`--analysis-load-reuse-level`) + at which analysis info has to be reused. + + A sample config file is available in `the downloads page <https://bitbucket.org/multicoreware/x265/downloads/Sample_ABR_ladder_config>`_ + + Default: Disabled ( Conventional single encode generation ). Experimental feature. + + **CLI ONLY** + SVT-HEVC Encoder Options ========================
View file
x265_3.3.tar.gz/doc/reST/releasenotes.rst -> x265_3.4.tar.gz/doc/reST/releasenotes.rst
Changed
@@ -2,6 +2,32 @@ Release Notes ************* +Version 3.4 +=========== + +Release date - 29th May, 2020. + +New features +------------ +1. **Edge-aware quadtree partitioning** to terminate CU depth recursion based on edge information. :option:`--rskip` level 2 enables the feature and :option:`--rskip-edge-threshold` denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. Experimental feature. +2. Application-level feature :option:`--abr-ladder` for automating efficient ABR ladder generation. Shows ~65% savings in the over-all turn-around time required for the generation of a typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz over a sequential ABR-ladder generation approach that leverages save-load architecture. + +Enhancements to existing features +--------------------------------- +1. Improved efficiency in 2-pass rate-control algorithm. The savings in the bitrate is ~1.72% with visual improvement in quality in the initial 1-2 secs. + +Encoder enhancements +-------------------- +1. Faster ARM64 encodes enabled by ASM contributions from Huawei. The speed-up over no-asm version for 1080p encodes @ medium preset is ~15% in a 16 core H/W. +2. Strict VBV conformance in zone encoding. + +Bug fixes +--------- +1. Multi-pass encode failures with :option:`--frame-dup`. +2. Corrupted bitstreams with :option:`--hist-scenecut` when input depth and internal bit-depth differ. +3. Incorrect analysis propagation in multi-level save-load architecture. +4. Failure in detecting NUMA packages installed in non-standard directories. + Version 3.3 ===========
View file
x265_3.3.tar.gz/source/CMakeLists.txt -> x265_3.4.tar.gz/source/CMakeLists.txt
Changed
@@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 188) +set(X265_BUILD 192) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" @@ -40,7 +40,7 @@ # System architecture detection string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC) set(X86_ALIASES x86 i386 i686 x86_64 amd64) -set(ARM_ALIASES armv6l armv7l) +set(ARM_ALIASES armv6l armv7l aarch64) list(FIND X86_ALIASES "${SYSPROC}" X86MATCH) list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH) set(POWER_ALIASES ppc64 ppc64le) @@ -70,9 +70,15 @@ else() set(CROSS_COMPILE_ARM 0) endif() - message(STATUS "Detected ARM target processor") set(ARM 1) - add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) + if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) + message(STATUS "Detected ARM64 target processor") + set(ARM64 1) + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) + else() + message(STATUS "Detected ARM target processor") + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) + endif() else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") @@ -95,6 +101,8 @@ if(NUMA_FOUND) link_directories(${NUMA_LIBRARY_DIR}) list(APPEND CMAKE_REQUIRED_LIBRARIES numa) + list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR}) + list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}") check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2) if(NUMA_V2) add_definitions(-DHAVE_LIBNUMA) @@ -231,14 +239,24 @@ endif() endif() if(ARM AND CROSS_COMPILE_ARM) - set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) + else() + set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + endif() + message(STATUS "cross compile arm") elseif(ARM) - find_package(Neon) - if(CPU_HAS_NEON) - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) add_definitions(-DHAVE_NEON) else() - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + find_package(Neon) + if(CPU_HAS_NEON) + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + add_definitions(-DHAVE_NEON) + else() + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + endif() endif() endif() add_definitions(${ARM_ARGS}) @@ -518,7 +536,11 @@ # compile ARM arch asm files here enable_language(ASM) foreach(ASM ${ARM_ASMS}) - set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + if(ARM64) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM}) + else() + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + endif() list(APPEND ASM_SRCS ${ASM_SRC}) list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) add_custom_command( @@ -725,16 +747,16 @@ # Xcode seems unable to link the CLI with libs, so link as one targget if(ENABLE_HDR10_PLUS) add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS}) else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS}) endif() else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE} - ${ExportDefs} x265.cpp x265.h x265cli.h) + ${ExportDefs} x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h) if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX) # The CLI cannot link to the shared library on Windows, it # requires internal APIs not exported from the DLL
View file
x265_3.4.tar.gz/source/abrEncApp.cpp
Added
@@ -0,0 +1,1108 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "abrEncApp.h" +#include "mv.h" +#include "slice.h" +#include "param.h" + +#include <signal.h> +#include <errno.h> + +#include <queue> + +using namespace X265_NS; + +/* Ctrl-C handler */ +static volatile sig_atomic_t b_ctrl_c /* = 0 */; +static void sigint_handler(int) +{ + b_ctrl_c = 1; +} + +namespace X265_NS { + // private namespace +#define X265_INPUT_QUEUE_SIZE 250 + + AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret) + { + m_numEncodes = numEncodes; + m_numActiveEncodes.set(numEncodes); + m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1; + m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes); + + for (uint8_t i = 0; i < m_numEncodes; i++) + { + m_passEnc[i] = new PassEncoder(i, cliopt[i], this); + if (!m_passEnc[i]) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n"); + ret = 4; + } + m_passEnc[i]->init(ret); + } + + if (!allocBuffers()) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n"); + ret = 4; + } + + /* start passEncoder worker threads */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + m_passEnc[pass]->startThreads(); + } + + bool AbrEncoder::allocBuffers() + { + m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes); + m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes); + + m_picWriteCnt = new ThreadSafeInteger[m_numEncodes]; + m_picReadCnt = new ThreadSafeInteger[m_numEncodes]; + m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes]; + m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes]; + + m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_readFlag = X265_MALLOC(int*, m_numEncodes); + + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize); + for (uint32_t idx = 0; idx < m_queueSize; idx++) + { + m_inputPicBuffer[pass][idx] = x265_picture_alloc(); + x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]); + } + + m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data, m_queueSize); + m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize]; + m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize]; + m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize]; + m_readFlag[pass] = X265_MALLOC(int, m_queueSize); + } + return true; + } + + void AbrEncoder::destroy() + { + x265_cleanup(); /* Free library singletons */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + for (uint32_t index = 0; index < m_queueSize; index++) + { + X265_FREE(m_inputPicBuffer[pass][index]->planes[0]); + x265_picture_free(m_inputPicBuffer[pass][index]); + } + + X265_FREE(m_inputPicBuffer[pass]); + X265_FREE(m_analysisBuffer[pass]); + X265_FREE(m_readFlag[pass]); + delete[] m_picIdxReadCnt[pass]; + delete[] m_analysisWrite[pass]; + delete[] m_analysisRead[pass]; + m_passEnc[pass]->destroy(); + delete m_passEnc[pass]; + } + X265_FREE(m_inputPicBuffer); + X265_FREE(m_analysisBuffer); + X265_FREE(m_readFlag); + + delete[] m_picWriteCnt; + delete[] m_picReadCnt; + delete[] m_analysisWriteCnt; + delete[] m_analysisReadCnt; + + X265_FREE(m_picIdxReadCnt); + X265_FREE(m_analysisWrite); + X265_FREE(m_analysisRead); + + X265_FREE(m_passEnc); + } + + PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent) + { + m_id = id; + m_cliopt = cliopt; + m_parent = parent; + if(!(m_cliopt.enableScaler && m_id)) + m_input = m_cliopt.input; + m_param = cliopt.param; + m_inputOver = false; + m_lastIdx = -1; + m_encoder = NULL; + m_scaler = NULL; + m_reader = NULL; + m_ret = 0; + } + + int PassEncoder::init(int &result) + { + if (m_parent->m_numEncodes > 1) + setReuseLevel(); + + if (!(m_cliopt.enableScaler && m_id)) + m_reader = new Reader(m_id, this); + else + { + VideoDesc *src = NULL, *dst = NULL; + dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth); + int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth; + int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight; + src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth); + if (src != NULL && dst != NULL) + { + m_scaler = new Scaler(0, 1, m_id, src, dst, this); + if (!m_scaler) + { + x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler"); + result = 4; + } + } + } + + /* note: we could try to acquire a different libx265 API here based on + * the profile found during option parsing, but it must be done before + * opening an encoder */ + + if (m_param) + m_encoder = m_cliopt.api->encoder_open(m_param); + if (!m_encoder) + { + x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n"); + m_ret = 2; + return -1; + } +
View file
x265_3.4.tar.gz/source/abrEncApp.h
Added
@@ -0,0 +1,153 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#ifndef ABR_ENCODE_H +#define ABR_ENCODE_H + +#include "x265.h" +#include "scaler.h" +#include "threading.h" +#include "x265cli.h" + +namespace X265_NS { + // private namespace + + class PassEncoder; + class Scaler; + class Reader; + + class AbrEncoder + { + public: + uint8_t m_numEncodes; + PassEncoder **m_passEnc; + uint32_t m_queueSize; + ThreadSafeInteger m_numActiveEncodes; + + x265_picture ***m_inputPicBuffer; //[numEncodes][queueSize] + x265_analysis_data **m_analysisBuffer; //[numEncodes][queueSize] + int **m_readFlag; + + ThreadSafeInteger *m_picWriteCnt; + ThreadSafeInteger *m_picReadCnt; + ThreadSafeInteger **m_picIdxReadCnt; + ThreadSafeInteger *m_analysisWriteCnt; //[numEncodes][queueSize] + ThreadSafeInteger *m_analysisReadCnt; //[numEncodes][queueSize] + ThreadSafeInteger **m_analysisWrite; //[numEncodes][queueSize] + ThreadSafeInteger **m_analysisRead; //[numEncodes][queueSize] + + AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int& ret); + bool allocBuffers(); + void destroy(); + + }; + + class PassEncoder : public Thread + { + public: + + uint32_t m_id; + x265_param *m_param; + AbrEncoder *m_parent; + x265_encoder *m_encoder; + Reader *m_reader; + Scaler *m_scaler; + bool m_inputOver; + + int m_threadActive; + int m_lastIdx; + uint32_t m_outputNalsCount; + + x265_picture **m_inputPicBuffer; + x265_analysis_data **m_analysisBuffer; + x265_nal **m_outputNals; + x265_picture **m_outputRecon; + + CLIOptions m_cliopt; + InputFile* m_input; + const char* m_reconPlayCmd; + FILE* m_qpfile; + FILE* m_zoneFile; + FILE* m_dolbyVisionRpu;/* File containing Dolby Vision BL RPU metadata */ + + int m_ret; + + PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent); + int init(int &result); + void setReuseLevel(); + + void startThreads(); + void copyInfo(x265_analysis_data *src); + + bool readPicture(x265_picture*); + void destroy(); + + private: + void threadMain(); + }; + + class Scaler : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + int m_scalePlanes[3]; + int m_scaleFrameSize; + uint32_t m_threadId; + uint32_t m_threadTotal; + ThreadSafeInteger m_scaledWriteCnt; + VideoDesc* m_srcFormat; + VideoDesc* m_dstFormat; + int m_threadActive; + ScalerFilterManager* m_filterManager; + + Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc * dst, PassEncoder *parentEnc); + bool scalePic(x265_picture *destination, x265_picture *source); + void threadMain(); + void destroy() + { + if (m_filterManager) + { + delete m_filterManager; + m_filterManager = NULL; + } + } + }; + + class Reader : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + InputFile* m_input; + int m_threadActive; + + Reader(int id, PassEncoder *parentEnc); + void threadMain(); + }; +} + +#endif // ifndef ABR_ENCODE_H +#pragma once
View file
x265_3.3.tar.gz/source/common/CMakeLists.txt -> x265_3.4.tar.gz/source/common/CMakeLists.txt
Changed
@@ -14,7 +14,7 @@ endif(EXTRA_LIB) if(ENABLE_ASSEMBLY) - set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) + set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1") endif(ENABLE_ASSEMBLY) @@ -84,16 +84,33 @@ endif(ENABLE_ASSEMBLY AND X86) if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) - set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + if(ARM64) + if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3")) + message(STATUS "Detected CXX compiler using -O3 optimization level") + add_definitions(-DAUTO_VECTORIZE=1) + endif() + set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h) - # add ARM assembly/intrinsic files here - set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) - set(VEC_PRIMITIVES) + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S) + set(VEC_PRIMITIVES) - set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") - foreach(SRC ${C_SRCS}) - set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) - endforeach() + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + endforeach() + else() + set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) + set(VEC_PRIMITIVES) + + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) + endforeach() + endif() source_group(Assembly FILES ${ASM_PRIMITIVES}) endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) @@ -151,4 +168,5 @@ predict.cpp predict.h scalinglist.cpp scalinglist.h quant.cpp quant.h contexts.h - deblock.cpp deblock.h) + deblock.cpp deblock.h + scaler.cpp scaler.h)
View file
x265_3.4.tar.gz/source/common/aarch64/asm-primitives.cpp
Added
@@ -0,0 +1,219 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "common.h" +#include "primitives.h" +#include "x265.h" +#include "cpu.h" + + +#if defined(__GNUC__) +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#define GCC_4_9_0 40900 +#define GCC_5_1_0 50100 + +extern "C" { +#include "pixel.h" +#include "pixel-util.h" +#include "ipfilter8.h" +} + +namespace X265_NS { +// private x265 namespace + + +template<int size> +void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY) +{ + ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]); + const int halfFilterSize = NTAPS_LUMA >> 1; + const int immedStride = MAX_CU_SIZE; + + primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1); + primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY); +} + + +/* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + asmp.pu[LUMA_8x4].luma_vsp = cp.pu[LUMA_8x4].luma_vsp; + asmp.pu[LUMA_8x8].luma_vsp = cp.pu[LUMA_8x8].luma_vsp; + asmp.pu[LUMA_8x16].luma_vsp = cp.pu[LUMA_8x16].luma_vsp; + asmp.pu[LUMA_8x32].luma_vsp = cp.pu[LUMA_8x32].luma_vsp; + asmp.pu[LUMA_12x16].luma_vsp = cp.pu[LUMA_12x16].luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + asmp.pu[LUMA_16x4].luma_vsp = cp.pu[LUMA_16x4].luma_vsp; + asmp.pu[LUMA_16x8].luma_vsp = cp.pu[LUMA_16x8].luma_vsp; + asmp.pu[LUMA_16x12].luma_vsp = cp.pu[LUMA_16x12].luma_vsp; + asmp.pu[LUMA_16x16].luma_vsp = cp.pu[LUMA_16x16].luma_vsp; + asmp.pu[LUMA_16x32].luma_vsp = cp.pu[LUMA_16x32].luma_vsp; + asmp.pu[LUMA_16x64].luma_vsp = cp.pu[LUMA_16x64].luma_vsp; + asmp.pu[LUMA_32x16].luma_vsp = cp.pu[LUMA_32x16].luma_vsp; + asmp.pu[LUMA_32x24].luma_vsp = cp.pu[LUMA_32x24].luma_vsp; + asmp.pu[LUMA_32x32].luma_vsp = cp.pu[LUMA_32x32].luma_vsp; + asmp.pu[LUMA_32x64].luma_vsp = cp.pu[LUMA_32x64].luma_vsp; + asmp.pu[LUMA_48x64].luma_vsp = cp.pu[LUMA_48x64].luma_vsp; + asmp.pu[LUMA_64x16].luma_vsp = cp.pu[LUMA_64x16].luma_vsp; + asmp.pu[LUMA_64x32].luma_vsp = cp.pu[LUMA_64x32].luma_vsp; + asmp.pu[LUMA_64x48].luma_vsp = cp.pu[LUMA_64x48].luma_vsp; + asmp.pu[LUMA_64x64].luma_vsp = cp.pu[LUMA_64x64].luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */ + asmp.pu[LUMA_4x4].luma_vsp = cp.pu[LUMA_4x4].luma_vsp; + asmp.pu[LUMA_4x8].luma_vsp = cp.pu[LUMA_4x8].luma_vsp; + asmp.pu[LUMA_4x16].luma_vsp = cp.pu[LUMA_4x16].luma_vsp; + asmp.pu[LUMA_24x32].luma_vsp = cp.pu[LUMA_24x32].luma_vsp; + asmp.pu[LUMA_32x8].luma_vsp = cp.pu[LUMA_32x8].luma_vsp; +#endif +#endif + } +} + + +void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + p.pu[LUMA_4x4].satd = PFX(pixel_satd_4x4_neon); + p.pu[LUMA_4x8].satd = PFX(pixel_satd_4x8_neon); + p.pu[LUMA_4x16].satd = PFX(pixel_satd_4x16_neon); + p.pu[LUMA_8x4].satd = PFX(pixel_satd_8x4_neon); + p.pu[LUMA_8x8].satd = PFX(pixel_satd_8x8_neon); + p.pu[LUMA_12x16].satd = PFX(pixel_satd_12x16_neon); + + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd = PFX(pixel_satd_4x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd = PFX(pixel_satd_4x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd = PFX(pixel_satd_4x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd = PFX(pixel_satd_8x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd = PFX(pixel_satd_8x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = PFX(pixel_satd_12x16_neon); + + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd = PFX(pixel_satd_4x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd = PFX(pixel_satd_4x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd = PFX(pixel_satd_4x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = PFX(pixel_satd_4x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd = PFX(pixel_satd_8x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd = PFX(pixel_satd_8x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = PFX(pixel_satd_12x32_neon); + + p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x32_neon); + + p.pu[LUMA_4x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x32_neon); + + p.pu[LUMA_8x4].sad_x3 = PFX(sad_x3_8x4_neon); + p.pu[LUMA_8x8].sad_x3 = PFX(sad_x3_8x8_neon); + p.pu[LUMA_8x16].sad_x3 = PFX(sad_x3_8x16_neon); + p.pu[LUMA_8x32].sad_x3 = PFX(sad_x3_8x32_neon); + + p.pu[LUMA_8x4].sad_x4 = PFX(sad_x4_8x4_neon); + p.pu[LUMA_8x8].sad_x4 = PFX(sad_x4_8x8_neon); + p.pu[LUMA_8x16].sad_x4 = PFX(sad_x4_8x16_neon); + p.pu[LUMA_8x32].sad_x4 = PFX(sad_x4_8x32_neon); + + // quant + p.quant = PFX(quant_neon); + // luma_hps + p.pu[LUMA_4x4].luma_hps = PFX(interp_8tap_horiz_ps_4x4_neon); + p.pu[LUMA_4x8].luma_hps = PFX(interp_8tap_horiz_ps_4x8_neon); + p.pu[LUMA_4x16].luma_hps = PFX(interp_8tap_horiz_ps_4x16_neon); + p.pu[LUMA_8x4].luma_hps = PFX(interp_8tap_horiz_ps_8x4_neon); + p.pu[LUMA_8x8].luma_hps = PFX(interp_8tap_horiz_ps_8x8_neon); + p.pu[LUMA_8x16].luma_hps = PFX(interp_8tap_horiz_ps_8x16_neon); + p.pu[LUMA_8x32].luma_hps = PFX(interp_8tap_horiz_ps_8x32_neon); + p.pu[LUMA_12x16].luma_hps = PFX(interp_8tap_horiz_ps_12x16_neon); + p.pu[LUMA_24x32].luma_hps = PFX(interp_8tap_horiz_ps_24x32_neon); +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.pu[LUMA_16x4].luma_hps = PFX(interp_8tap_horiz_ps_16x4_neon); + p.pu[LUMA_16x8].luma_hps = PFX(interp_8tap_horiz_ps_16x8_neon); + p.pu[LUMA_16x12].luma_hps = PFX(interp_8tap_horiz_ps_16x12_neon); + p.pu[LUMA_16x16].luma_hps = PFX(interp_8tap_horiz_ps_16x16_neon); + p.pu[LUMA_16x32].luma_hps = PFX(interp_8tap_horiz_ps_16x32_neon); + p.pu[LUMA_16x64].luma_hps = PFX(interp_8tap_horiz_ps_16x64_neon); + p.pu[LUMA_32x8].luma_hps = PFX(interp_8tap_horiz_ps_32x8_neon); + p.pu[LUMA_32x16].luma_hps = PFX(interp_8tap_horiz_ps_32x16_neon); + p.pu[LUMA_32x24].luma_hps = PFX(interp_8tap_horiz_ps_32x24_neon); + p.pu[LUMA_32x32].luma_hps = PFX(interp_8tap_horiz_ps_32x32_neon); + p.pu[LUMA_32x64].luma_hps = PFX(interp_8tap_horiz_ps_32x64_neon); + p.pu[LUMA_48x64].luma_hps = PFX(interp_8tap_horiz_ps_48x64_neon); + p.pu[LUMA_64x16].luma_hps = PFX(interp_8tap_horiz_ps_64x16_neon); + p.pu[LUMA_64x32].luma_hps = PFX(interp_8tap_horiz_ps_64x32_neon); + p.pu[LUMA_64x48].luma_hps = PFX(interp_8tap_horiz_ps_64x48_neon); + p.pu[LUMA_64x64].luma_hps = PFX(interp_8tap_horiz_ps_64x64_neon); +#endif + + p.pu[LUMA_8x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x4>; + p.pu[LUMA_8x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x8>; + p.pu[LUMA_8x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x16>; + p.pu[LUMA_8x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x32>; + p.pu[LUMA_12x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_12x16>; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.pu[LUMA_16x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x4>; + p.pu[LUMA_16x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x8>; + p.pu[LUMA_16x12].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x12>; + p.pu[LUMA_16x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x16>; + p.pu[LUMA_16x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x32>; + p.pu[LUMA_16x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x64>; + p.pu[LUMA_32x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x16>; + p.pu[LUMA_32x24].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x24>; + p.pu[LUMA_32x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x32>; + p.pu[LUMA_32x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x64>; + p.pu[LUMA_48x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_48x64>;
View file
x265_3.4.tar.gz/source/common/aarch64/asm.S
Added
@@ -0,0 +1,69 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +.arch armv8-a + +#ifdef PREFIX +#define EXTERN_ASM _ +#else +#define EXTERN_ASM +#endif + +#ifdef __ELF__ +#define ELF +#else +#define ELF @ +#endif + +#define HAVE_AS_FUNC 1 + +#if HAVE_AS_FUNC +#define FUNC +#else +#define FUNC @ +#endif + +.macro function name, export=1 + .macro endfunc +ELF .size \name, . - \name +FUNC .endfunc + .purgem endfunc + .endm + .align 2 +.if \export == 1 + .global EXTERN_ASM\name +ELF .hidden EXTERN_ASM\name +ELF .type EXTERN_ASM\name, %function +FUNC .func EXTERN_ASM\name +EXTERN_ASM\name: +.else +ELF .hidden \name +ELF .type \name, %function +FUNC .func \name +\name: +.endif +.endm + + +#define FENC_STRIDE 64 +#define FDEC_STRIDE 32
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.S
Added
@@ -0,0 +1,414 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + + + +.macro qpel_filter_0_32b + movi v24.8h, #64 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v24.4h + smull2 v18.4s, v19.8h, v24.8h +.endm + +.macro qpel_filter_1_32b + movi v16.8h, #58 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + movi v24.8h, #10 + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #17 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #5 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v4.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v0.8b + uxtl v2.8h, v3.8b + ssubl v21.4s, v2.4h, v1.4h + ssubl2 v22.4s, v2.8h, v1.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + +.macro qpel_filter_2_32b + movi v16.4s, #11 + uxtl v19.8h, v5.8b + uxtl v20.8h, v2.8b + saddl v17.4s, v19.4h, v20.4h + saddl2 v18.4s, v19.8h, v20.8h + + uxtl v21.8h, v1.8b + uxtl v22.8h, v6.8b + saddl v19.4s, v21.4h, v22.4h + saddl2 v20.4s, v21.8h, v22.8h + + mul v19.4s, v19.4s, v16.4s + mul v20.4s, v20.4s, v16.4s + + movi v16.4s, #40 + mul v17.4s, v17.4s, v16.4s + mul v18.4s, v18.4s, v16.4s + + uxtl v21.8h, v4.8b + uxtl v22.8h, v3.8b + saddl v23.4s, v21.4h, v22.4h + saddl2 v16.4s, v21.8h, v22.8h + + uxtl v1.8h, v0.8b + uxtl v2.8h, v7.8b + saddl v21.4s, v1.4h, v2.4h + saddl2 v22.4s, v1.8h, v2.8h + + shl v23.4s, v23.4s, #2 + shl v16.4s, v16.4s, #2 + + add v19.4s, v19.4s, v21.4s + add v20.4s, v20.4s, v22.4s + add v17.4s, v17.4s, v23.4s + add v18.4s, v18.4s, v16.4s + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s +.endm + +.macro qpel_filter_3_32b + movi v16.8h, #17 + movi v24.8h, #5 + + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #58 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #10 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v3.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v4.8b + uxtl v2.8h, v7.8b + ssubl v21.4s, v1.4h, v2.4h + ssubl2 v22.4s, v1.8h, v2.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + + + + +.macro vextin8 + ld1 {v3.16b}, [x11], #16 + mov v7.d[0], v3.d[1] + ext v0.8b, v3.8b, v7.8b, #1 + ext v4.8b, v3.8b, v7.8b, #2 + ext v1.8b, v3.8b, v7.8b, #3 + ext v5.8b, v3.8b, v7.8b, #4 + ext v2.8b, v3.8b, v7.8b, #5 + ext v6.8b, v3.8b, v7.8b, #6 + ext v3.8b, v3.8b, v7.8b, #7 +.endm + + + +// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt) +.macro HPS_FILTER a b filterhps + mov w12, #8192 + mov w6, w10 + sub x3, x3, #\a + lsl x3, x3, #1 + mov w9, #\a + cmp w9, #4 + b.eq 14f + cmp w9, #12 + b.eq 15f + b 7f +14:
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.h
Added
@@ -0,0 +1,55 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_IPFILTER8_AARCH64_H +#define X265_IPFILTER8_AARCH64_H + + +void x265_interp_8tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); + + +#endif // ifndef X265_IPFILTER8_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/mc-a.S
Added
@@ -0,0 +1,63 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro pixel_avg_pp_4xN_neon h +function x265_pixel_avg_pp_4x\h\()_neon +.rept \h + ld1 {v0.s}[0], [x2], x3 + ld1 {v1.s}[0], [x4], x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.s}[0], [x0], x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_4xN_neon 4 +pixel_avg_pp_4xN_neon 8 +pixel_avg_pp_4xN_neon 16 + +.macro pixel_avg_pp_8xN_neon h +function x265_pixel_avg_pp_8x\h\()_neon +.rept \h + ld1 {v0.8b}, [x2], x3 + ld1 {v1.8b}, [x4], x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.8b}, [x0], x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_8xN_neon 4 +pixel_avg_pp_8xN_neon 8 +pixel_avg_pp_8xN_neon 16 +pixel_avg_pp_8xN_neon 32
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.S
Added
@@ -0,0 +1,419 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro x265_satd_4x8_8x4_end_neon + add v0.8h, v4.8h, v6.8h + add v1.8h, v5.8h, v7.8h + sub v2.8h, v4.8h, v6.8h + sub v3.8h, v5.8h, v7.8h + + trn1 v16.8h, v0.8h, v1.8h + trn2 v17.8h, v0.8h, v1.8h + add v4.8h, v16.8h, v17.8h + trn1 v18.8h, v2.8h, v3.8h + trn2 v19.8h, v2.8h, v3.8h + sub v5.8h, v16.8h, v17.8h + add v6.8h, v18.8h, v19.8h + sub v7.8h, v18.8h, v19.8h + trn1 v0.4s, v4.4s, v6.4s + trn2 v2.4s, v4.4s, v6.4s + abs v0.8h, v0.8h + trn1 v1.4s, v5.4s, v7.4s + trn2 v3.4s, v5.4s, v7.4s + abs v2.8h, v2.8h + abs v1.8h, v1.8h + abs v3.8h, v3.8h + umax v0.8h, v0.8h, v2.8h + umax v1.8h, v1.8h, v3.8h + add v0.8h, v0.8h, v1.8h + uaddlv s0, v0.8h +.endm + +.macro pixel_satd_4x8_neon + ld1r {v1.2s}, [x2], x3 + ld1r {v0.2s}, [x0], x1 + ld1r {v3.2s}, [x2], x3 + ld1r {v2.2s}, [x0], x1 + ld1r {v5.2s}, [x2], x3 + ld1r {v4.2s}, [x0], x1 + ld1r {v7.2s}, [x2], x3 + ld1r {v6.2s}, [x0], x1 + + ld1 {v1.s}[1], [x2], x3 + ld1 {v0.s}[1], [x0], x1 + usubl v0.8h, v0.8b, v1.8b + ld1 {v3.s}[1], [x2], x3 + ld1 {v2.s}[1], [x0], x1 + usubl v1.8h, v2.8b, v3.8b + ld1 {v5.s}[1], [x2], x3 + ld1 {v4.s}[1], [x0], x1 + usubl v2.8h, v4.8b, v5.8b + ld1 {v7.s}[1], [x2], x3 + add v4.8h, v0.8h, v1.8h + sub v5.8h, v0.8h, v1.8h + ld1 {v6.s}[1], [x0], x1 + usubl v3.8h, v6.8b, v7.8b + add v6.8h, v2.8h, v3.8h + sub v7.8h, v2.8h, v3.8h + x265_satd_4x8_8x4_end_neon +.endm + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x8_neon + pixel_satd_4x8_neon + mov w0, v0.s[0] + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x16_neon + eor w4, w4, w4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w0, w5, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x32_neon + eor w4, w4, w4 +.rept 4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 +.endr + mov w0, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x16_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + + add x0, x4, #4 + add x2, x5, #4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + + add x0, x4, #8 + add x2, x5, #8 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w0, w7, w6 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x32_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + add x0, x4, #4 + add x2, x5, #4 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + add x0, x4, #8 + add x2, x5, #8 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + mov w0, w7 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_8x8_neon + eor w4, w4, w4 + mov x6, x0 + mov x7, x2 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 + add x0, x6, #4 + add x2, x7, #4
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.h
Added
@@ -0,0 +1,40 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_PIXEL_UTIL_AARCH64_H +#define X265_PIXEL_UTIL_AARCH64_H + +int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); + +uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff); +int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride); + +#endif // ifndef X265_PIXEL_UTIL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/pixel.h
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_I386_PIXEL_AARCH64_H +#define X265_I386_PIXEL_AARCH64_H + +void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); + +void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); + +void x265_sad_x4_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); + +#endif // ifndef X265_I386_PIXEL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/sad-a.S
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro SAD_X_START_8 x + ld1 {v0.8b}, [x0], x9 +.if \x == 3 + ld1 {v1.8b}, [x1], x4 + ld1 {v2.8b}, [x2], x4 + ld1 {v3.8b}, [x3], x4 +.elseif \x == 4 + ld1 {v1.8b}, [x1], x5 + ld1 {v2.8b}, [x2], x5 + ld1 {v3.8b}, [x3], x5 + ld1 {v4.8b}, [x4], x5 +.endif + uabdl v16.8h, v0.8b, v1.8b + uabdl v17.8h, v0.8b, v2.8b + uabdl v18.8h, v0.8b, v3.8b +.if \x == 4 + uabdl v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8 x + ld1 {v0.8b}, [x0], x9 +.if \x == 3 + ld1 {v1.8b}, [x1], x4 + ld1 {v2.8b}, [x2], x4 + ld1 {v3.8b}, [x3], x4 +.elseif \x == 4 + ld1 {v1.8b}, [x1], x5 + ld1 {v2.8b}, [x2], x5 + ld1 {v3.8b}, [x3], x5 + ld1 {v4.8b}, [x4], x5 +.endif + uabal v16.8h, v0.8b, v1.8b + uabal v17.8h, v0.8b, v2.8b + uabal v18.8h, v0.8b, v3.8b +.if \x == 4 + uabal v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8xN x, h +function x265_sad_x\x\()_8x\h\()_neon + mov x9, #FENC_STRIDE + SAD_X_START_8 \x +.rept \h - 1 + SAD_X_8 \x +.endr + uaddlv s0, v16.8h + uaddlv s1, v17.8h + uaddlv s2, v18.8h +.if \x == 4 + uaddlv s3, v19.8h +.endif + +.if \x == 3 + stp s0, s1, [x5] + str s2, [x5, #8] +.elseif \x == 4 + stp s0, s1, [x6] + stp s2, s3, [x6, #8] +.endif + ret +endfunc +.endm + +SAD_X_8xN 3 4 +SAD_X_8xN 3 8 +SAD_X_8xN 3 16 +SAD_X_8xN 3 32 + +SAD_X_8xN 4 4 +SAD_X_8xN 4 8 +SAD_X_8xN 4 16 +SAD_X_8xN 4 32
View file
x265_3.3.tar.gz/source/common/arm/asm-primitives.cpp -> x265_3.4.tar.gz/source/common/arm/asm-primitives.cpp
Changed
@@ -5,6 +5,7 @@ * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com> * Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,77 +49,77 @@ p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon); // addAvg - p.pu[LUMA_4x4].addAvg = PFX(addAvg_4x4_neon); - p.pu[LUMA_4x8].addAvg = PFX(addAvg_4x8_neon); - p.pu[LUMA_4x16].addAvg = PFX(addAvg_4x16_neon); - p.pu[LUMA_8x4].addAvg = PFX(addAvg_8x4_neon); - p.pu[LUMA_8x8].addAvg = PFX(addAvg_8x8_neon); - p.pu[LUMA_8x16].addAvg = PFX(addAvg_8x16_neon); - p.pu[LUMA_8x32].addAvg = PFX(addAvg_8x32_neon); - p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon); - p.pu[LUMA_16x4].addAvg = PFX(addAvg_16x4_neon); - p.pu[LUMA_16x8].addAvg = PFX(addAvg_16x8_neon); - p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon); - p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon); - p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon); - p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon); - p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon); - p.pu[LUMA_32x8].addAvg = PFX(addAvg_32x8_neon); - p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon); - p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon); - p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon); - p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon); - p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon); - p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon); - p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon); - p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon); - p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon); + p.pu[LUMA_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); + p.pu[LUMA_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.pu[LUMA_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.pu[LUMA_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.pu[LUMA_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.pu[LUMA_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.pu[LUMA_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.pu[LUMA_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); + p.pu[LUMA_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); + p.pu[LUMA_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.pu[LUMA_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); + p.pu[LUMA_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.pu[LUMA_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.pu[LUMA_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); + p.pu[LUMA_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); + p.pu[LUMA_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); + p.pu[LUMA_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.pu[LUMA_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); + p.pu[LUMA_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + p.pu[LUMA_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); + p.pu[LUMA_48x64].addAvg[NONALIGNED] = PFX(addAvg_48x64_neon); + p.pu[LUMA_64x16].addAvg[NONALIGNED] = PFX(addAvg_64x16_neon); + p.pu[LUMA_64x32].addAvg[NONALIGNED] = PFX(addAvg_64x32_neon); + p.pu[LUMA_64x48].addAvg[NONALIGNED] = PFX(addAvg_64x48_neon); + p.pu[LUMA_64x64].addAvg[NONALIGNED] = PFX(addAvg_64x64_neon); // chroma addAvg - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg = PFX(addAvg_4x2_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg = PFX(addAvg_4x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg = PFX(addAvg_4x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg = PFX(addAvg_4x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg = PFX(addAvg_6x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg = PFX(addAvg_8x2_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg = PFX(addAvg_8x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg = PFX(addAvg_8x6_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg = PFX(addAvg_8x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg = PFX(addAvg_8x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg = PFX(addAvg_8x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg = PFX(addAvg_16x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg = PFX(addAvg_16x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg = PFX(addAvg_32x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon); - - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg = PFX(addAvg_4x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg = PFX(addAvg_4x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg = PFX(addAvg_4x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg = PFX(addAvg_6x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = PFX(addAvg_8x4_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = PFX(addAvg_8x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = PFX(addAvg_8x12_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = PFX(addAvg_8x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = PFX(addAvg_8x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = PFX(addAvg_8x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = PFX(addAvg_16x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[NONALIGNED] = PFX(addAvg_4x2_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[NONALIGNED] = PFX(addAvg_6x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[NONALIGNED] = PFX(addAvg_8x2_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[NONALIGNED] = PFX(addAvg_8x6_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[NONALIGNED] = PFX(addAvg_4x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[NONALIGNED] = PFX(addAvg_6x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[NONALIGNED] = PFX(addAvg_8x12_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[NONALIGNED] = PFX(addAvg_8x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[NONALIGNED] = PFX(addAvg_12x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[NONALIGNED] = PFX(addAvg_16x24_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[NONALIGNED] = PFX(addAvg_24x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[NONALIGNED] = PFX(addAvg_32x48_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); // quant p.quant = PFX(quant_neon); @@ -402,7 +403,7 @@ p.scale2D_64to32 = PFX(scale2D_64to32_neon); // scale1D_128to64 - p.scale1D_128to64 = PFX(scale1D_128to64_neon); + p.scale1D_128to64[NONALIGNED] = PFX(scale1D_128to64_neon); // copy_count p.cu[BLOCK_4x4].copy_cnt = PFX(copy_cnt_4_neon); @@ -411,37 +412,37 @@ p.cu[BLOCK_32x32].copy_cnt = PFX(copy_cnt_32_neon); // filterPixelToShort - p.pu[LUMA_4x4].convert_p2s = PFX(filterPixelToShort_4x4_neon); - p.pu[LUMA_4x8].convert_p2s = PFX(filterPixelToShort_4x8_neon); - p.pu[LUMA_4x16].convert_p2s = PFX(filterPixelToShort_4x16_neon); - p.pu[LUMA_8x4].convert_p2s = PFX(filterPixelToShort_8x4_neon); - p.pu[LUMA_8x8].convert_p2s = PFX(filterPixelToShort_8x8_neon); - p.pu[LUMA_8x16].convert_p2s = PFX(filterPixelToShort_8x16_neon); - p.pu[LUMA_8x32].convert_p2s = PFX(filterPixelToShort_8x32_neon); - p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon); - p.pu[LUMA_16x4].convert_p2s = PFX(filterPixelToShort_16x4_neon); - p.pu[LUMA_16x8].convert_p2s = PFX(filterPixelToShort_16x8_neon); - p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon); - p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon); - p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon); - p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon); - p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon); - p.pu[LUMA_32x8].convert_p2s = PFX(filterPixelToShort_32x8_neon); - p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon); - p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon); - p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon); - p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon); - p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon); - p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon); - p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon); - p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon); - p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon); + p.pu[LUMA_4x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x4_neon); + p.pu[LUMA_4x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x8_neon); + p.pu[LUMA_4x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x16_neon); + p.pu[LUMA_8x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x4_neon); + p.pu[LUMA_8x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x8_neon); + p.pu[LUMA_8x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x16_neon); + p.pu[LUMA_8x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x32_neon);
View file
x265_3.3.tar.gz/source/common/common.h -> x265_3.4.tar.gz/source/common/common.h
Changed
@@ -129,6 +129,7 @@ typedef uint64_t sum2_t; typedef uint64_t pixel4; typedef int64_t ssum2_t; +#define SHIFT_TO_BITPLANE 9 #define HISTOGRAM_BINS 1024 #else typedef uint8_t pixel; @@ -136,6 +137,7 @@ typedef uint32_t sum2_t; typedef uint32_t pixel4; typedef int32_t ssum2_t; // Signed sum +#define SHIFT_TO_BITPLANE 7 #define HISTOGRAM_BINS 256 #endif // if HIGH_BIT_DEPTH @@ -270,6 +272,9 @@ #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE) #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE) +#define RDCOST_BASED_RSKIP 1 +#define EDGE_BASED_RSKIP 2 + #define COEF_REMAIN_BIN_REDUCTION 3 // indicates the level at which the VLC // transitions from Golomb-Rice to TU+EG(k)
View file
x265_3.3.tar.gz/source/common/cpu.cpp -> x265_3.4.tar.gz/source/common/cpu.cpp
Changed
@@ -5,6 +5,8 @@ * Laurent Aimar <fenrir@via.ecp.fr> * Fiona Glaser <fiona@x264.com> * Steve Borho <steve@borho.org> + * Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -367,6 +369,8 @@ flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) +#elif X265_ARCH_ARM64 + flags |= X265_CPU_NEON; #endif // if HAVE_ARMV6 return flags; }
View file
x265_3.3.tar.gz/source/common/frame.cpp -> x265_3.4.tar.gz/source/common/frame.cpp
Changed
@@ -61,6 +61,8 @@ m_edgePic = NULL; m_gaussianPic = NULL; m_thetaPic = NULL; + m_edgeBitPlane = NULL; + m_edgeBitPic = NULL; } bool Frame::create(x265_param *param, float* quantOffsets) @@ -115,6 +117,19 @@ m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2))); } + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize; + uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize; + uint32_t lumaMarginX = param->maxCUSize + 32; + uint32_t lumaMarginY = param->maxCUSize + 16; + uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1); + uint32_t maxHeight = numCuInHeight * param->maxCUSize; + uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2)); + CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize); + m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX; + } + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); @@ -267,4 +282,10 @@ X265_FREE(m_gaussianPic); X265_FREE(m_thetaPic); } + + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + X265_FREE_ZERO(m_edgeBitPlane); + m_edgeBitPic = NULL; + } }
View file
x265_3.3.tar.gz/source/common/frame.h -> x265_3.4.tar.gz/source/common/frame.h
Changed
@@ -99,7 +99,7 @@ float* m_quantOffsets; // points to quantOffsets in x265_picture x265_sei m_userSEI; uint32_t m_picStruct; // picture structure SEI message - x265_dolby_vision_rpu m_rpu; + x265_dolby_vision_rpu m_rpu; /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */ ThreadSafeInteger* m_reconRowFlag; // flag of CTU rows completely reconstructed and extended for motion reference @@ -137,6 +137,10 @@ pixel* m_gaussianPic; pixel* m_thetaPic; + /* edge bit plane for rskips 2 and 3 */ + pixel* m_edgeBitPlane; + pixel* m_edgeBitPic; + Frame(); bool create(x265_param *param, float* quantOffsets);
View file
x265_3.3.tar.gz/source/common/param.cpp -> x265_3.4.tar.gz/source/common/param.cpp
Changed
@@ -198,7 +198,8 @@ param->bEnableWeightedPred = 1; param->bEnableWeightedBiPred = 0; param->bEnableEarlySkip = 1; - param->bEnableRecursionSkip = 1; + param->recursionSkipMode = 1; + param->edgeVarThreshold = 0.05f; param->bEnableAMP = 0; param->bEnableRectInter = 0; param->rdLevel = 3; @@ -285,6 +286,7 @@ param->rc.bEnableConstVbv = 0; param->bResetZoneConfig = 1; param->reconfigWindowSize = 0; + param->decoderVbvMaxRate = 0; /* Video Usability Information (VUI) */ param->vui.aspectRatioIdc = 0; @@ -546,7 +548,7 @@ param->maxNumMergeCand = 5; param->searchMethod = X265_STAR_SEARCH; param->bEnableTransformSkip = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->maxNumReferences = 5; param->limitReferences = 0; param->lookaheadSlices = 0; // disabled for best quality @@ -598,7 +600,7 @@ param->rc.hevcAq = 0; param->rc.qpStep = 1; param->rc.bEnableGrain = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->psyRd = 4.0; param->psyRdoq = 10.0; param->bEnableSAO = 0; @@ -702,8 +704,9 @@ OPT("ref") p->maxNumReferences = atoi(value); OPT("fast-intra") p->bEnableFastIntra = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); - OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError); + OPT("rskip") p->recursionSkipMode = atoi(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; + OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError); OPT("subme") p->subpelRefine = atoi(value); OPT("merange") p->searchRange = atoi(value); OPT("rect") p->bEnableRectInter = atobool(value); @@ -919,7 +922,7 @@ OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value); OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); + OPT("rskip") p->recursionSkipMode = atoi(value); OPT("rdpenalty") p->rdPenalty = atoi(value); OPT("tskip") p->bEnableTransformSkip = atobool(value); OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value); @@ -1221,6 +1224,7 @@ } } OPT("hist-threshold") p->edgeTransitionThreshold = atof(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; OPT("lookahead-threads") p->lookaheadThreads = atoi(value); OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value); @@ -1596,9 +1600,16 @@ CHECK(param->rdLevel < 1 || param->rdLevel > 6, "RD Level is out of range"); CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2, - "RDOQ Level is out of range"); + "RDOQ Level is out of range"); CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH, - "Dynamic RD strength must be between 0 and 4"); + "Dynamic RD strength must be between 0 and 4"); + CHECK(param->recursionSkipMode > 2 || param->recursionSkipMode < 0, + "Invalid Recursion skip mode. Valid modes 0,1,2"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f, + "Minimum edge density percentage for a CU should be an integer between 0 to 100"); + } CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead, "Lookahead depth must be greater than the max consecutive bframe count"); CHECK(param->bframes < 0, @@ -1789,6 +1800,7 @@ } CHECK(param->confWinRightOffset < 0, "Conformance Window Right Offset must be 0 or greater"); CHECK(param->confWinBottomOffset < 0, "Conformance Window Bottom Offset must be 0 or greater"); + CHECK(param->decoderVbvMaxRate < 0, "Invalid Decoder Vbv Maxrate. Value can not be less than zero"); return check_failed; } @@ -1908,7 +1920,9 @@ TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf"); TOOLOPT(param->bEnableRdRefine, "rd-refine"); TOOLOPT(param->bEnableEarlySkip, "early-skip"); - TOOLOPT(param->bEnableRecursionSkip, "rskip"); + TOOLVAL(param->recursionSkipMode, "rskip mode=%d"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f"); TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip"); TOOLVAL(param->noiseReductionIntra, "nr-intra=%d"); TOOLVAL(param->noiseReductionInter, "nr-inter=%d"); @@ -2066,7 +2080,10 @@ s += sprintf(s, " rd=%d", p->rdLevel); s += sprintf(s, " selective-sao=%d", p->selectiveSAO); BOOL(p->bEnableEarlySkip, "early-skip"); - BOOL(p->bEnableRecursionSkip, "rskip"); + BOOL(p->recursionSkipMode, "rskip"); + if (p->recursionSkipMode == EDGE_BASED_RSKIP) + s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold); + BOOL(p->bEnableFastIntra, "fast-intra"); BOOL(p->bEnableTSkipFast, "tskip-fast"); BOOL(p->bCULossless, "cu-lossless"); @@ -2204,6 +2221,7 @@ if (p->bEnableSceneCutAwareQp) s += sprintf(s, " scenecut-window=%d max-qp-delta=%d", p->scenecutWindow, p->maxQpDelta); s += sprintf(s, "conformance-window-offsets right=%d bottom=%d", p->confWinRightOffset, p->confWinBottomOffset); + s += sprintf(s, " decoder-max-rate=%d", p->decoderVbvMaxRate); #undef BOOL return buf; } @@ -2373,7 +2391,8 @@ dst->bSaoNonDeblocked = src->bSaoNonDeblocked; dst->rdLevel = src->rdLevel; dst->bEnableEarlySkip = src->bEnableEarlySkip; - dst->bEnableRecursionSkip = src->bEnableRecursionSkip; + dst->recursionSkipMode = src->recursionSkipMode; + dst->edgeVarThreshold = src->edgeVarThreshold; dst->bEnableFastIntra = src->bEnableFastIntra; dst->bEnableTSkipFast = src->bEnableTSkipFast; dst->bCULossless = src->bCULossless; @@ -2419,8 +2438,9 @@ dst->rc.zonefileCount = src->rc.zonefileCount; dst->reconfigWindowSize = src->reconfigWindowSize; dst->bResetZoneConfig = src->bResetZoneConfig; + dst->decoderVbvMaxRate = src->decoderVbvMaxRate; - if (src->rc.zonefileCount && src->rc.zones) + if (src->rc.zonefileCount && src->rc.zones && src->bResetZoneConfig) { for (int i = 0; i < src->rc.zonefileCount; i++) {
View file
x265_3.3.tar.gz/source/common/pixel.cpp -> x265_3.4.tar.gz/source/common/pixel.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <min.chen@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -265,6 +266,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 4) satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -279,6 +284,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 8) satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -876,6 +885,18 @@ } } +static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift) +{ + for (int r = 0; r < height; r++) + { + for (int c = 0; c < width; c++) + dst[c] = (pixel)((src[c] >> shift)); + + dst += dstStride; + src += srcStride; + } +} + static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask) { for (int r = 0; r < height; r++) @@ -1316,6 +1337,7 @@ p.planecopy_cp = planecopy_cp_c; p.planecopy_sp = planecopy_sp_c; p.planecopy_sp_shl = planecopy_sp_shl_c; + p.planecopy_pp_shr = planecopy_pp_shr_c; #if HIGH_BIT_DEPTH p.planeClipAndMax = planeClipAndMax_c; #endif
View file
x265_3.3.tar.gz/source/common/primitives.h -> x265_3.4.tar.gz/source/common/primitives.h
Changed
@@ -8,6 +8,8 @@ * Rajesh Paulraj <rajesh@multicorewareinc.com> * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Hongbin Liu<liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -204,6 +206,7 @@ typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX); typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask); +typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix); typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len); @@ -358,6 +361,7 @@ planecopy_cp_t planecopy_cp; planecopy_sp_t planecopy_sp; planecopy_sp_t planecopy_sp_shl; + planecopy_pp_t planecopy_pp_shr; planeClipAndMax_t planeClipAndMax; weightp_sp_t weight_sp; @@ -465,6 +469,9 @@ void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask); void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask); void setupAliasPrimitives(EncoderPrimitives &p); +#if X265_ARCH_ARM64 +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask); +#endif #if HAVE_ALTIVEC void setupPixelPrimitives_altivec(EncoderPrimitives &p); void setupDCTPrimitives_altivec(EncoderPrimitives &p); @@ -479,4 +486,10 @@ extern const char* PFX(build_info_str); #endif +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 +extern "C" { +#include "aarch64/pixel-util.h" +} +#endif + #endif // ifndef X265_PRIMITIVES_H
View file
x265_3.4.tar.gz/source/common/scaler.cpp
Added
@@ -0,0 +1,1110 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "scaler.h" + +#if _MSC_VER +#pragma warning(disable: 4706) // assignment within conditional +#pragma warning(disable: 4244) // '=' : possible loss of data +#endif + +#define SHORT_MIN (-(1 << 15)) +#define SHORT_MAX ((1 << 15) - 1) +#define SHORT_MAX_10 ((1 << 10) - 1) + +namespace X265_NS{ + +ScalerFilterManager::ScalerFilterManager() : + m_bitDepth(0), + m_algorithmFlags(0), + m_srcW(0), + m_srcH(0), + m_dstW(0), + m_dstH(0), + m_crSrcW(0), + m_crSrcH(0), + m_crDstW(0), + m_crDstH(0), + m_crSrcHSubSample(0), + m_crSrcVSubSample(0), + m_crDstHSubSample(0), + m_crDstVSubSample(0) +{ + for (int i = 0; i < m_numSlice; i++) + m_slices[i] = NULL; + for (int i = 0; i < m_numFilter; i++) + m_ScalerFilters[i] = NULL; +} + +inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size) +{ + for (int i = 0; i < size; i++) + filter2[i] = filter[i]; +} + +#if X265_DEPTH == 8 +static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPos[i]; + for (int j = 0; j < filterSize; j++) + val += ((int)src[sourcePos + j]) * filter[filterSize * i + j]; + // the cubic equation does overflow ... + dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7); + } +} +static uint8_t clipUint8(int a) +{ + if (a&(~0xFF)) + return (-a) >> 31; + else + return a; +} + +static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 64 << 12; + for (int j = 0; j < filterSize; j++) + val += src[j][i] * filter[j]; + dest[i] = clipUint8(val >> 19); + } +} +#else +static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 1 << 16; + uint16_t* dst16bit = (uint16_t *)dest; + for (int j = 0; j < filterSize; j++) + val += src[j][i] * filter[j]; + uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17); + ((uint8_t*)(&dst16bit[i]))[0] = (d); + ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8; + } +} +static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + const uint16_t *srcLocal = (const uint16_t *)src; + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPos[i]; + for (int j = 0; j < filterSize; j++) + val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j]; + // the cubic equation does overflow + dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9); + } +} +#endif + +ScalerFilter::ScalerFilter() : + m_filtLen(0), + m_filtPos(NULL), + m_filt(NULL), + m_sourceSlice(NULL), + m_destSlice(NULL) +{ +} + +ScalerFilter::~ScalerFilter() +{ + if (m_filtPos) { + delete[] m_filtPos; m_filtPos = NULL; + } + if (m_filt) { + delete[] m_filt; m_filt = NULL; + } +} + +void ScalerHLumFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf; + uint8_t ** dst = m_destSlice->m_plane[0].lineBuf; + int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer; + int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer; + int dstW = m_destSlice->m_width; + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane[0].sliceHor += 1; + } +} + +void ScalerHCrFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf; + uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf; + uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf; + uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf; + + int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer; + int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer; + int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer; + int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer; + + int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample; + + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen); + m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane[1].sliceHor += 1; + m_destSlice->m_plane[2].sliceHor += 1; + } +} + +void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + yuv2PlaneX_c(filter, filterSize, src, dest, dstW); +#else + yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW); +#endif +} + +void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF;
View file
x265_3.4.tar.gz/source/common/scaler.h
Added
@@ -0,0 +1,254 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Pooja Venkatesan <pooja@multicorewareinc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_SCALER_H +#define X265_SCALER_H + +#include "common.h" + +namespace X265_NS { +//x265 private namespace + +class ScalerSlice; +class VideoDesc; + +#define MAX_NUM_LINES_AHEAD 4 +#define SCALER_ALIGN(x, j) (((x)+(j)-1)&~((j)-1)) +#define X265_ABS(j) ((j) >= 0 ? (j) : (-(j))) +#define SCALER_MAX_REDUCE_CUTOFF 0.002 +#define SCALER_BITEXACT 0x80000 +#define ROUNDED_DIVISION(i,j) (((i)>0 ? (i) + ((j)>>1) : (i) - ((j)>>1))/(j)) +#define UH_CEIL_SHIFTR(i,j) (!scale_builtin_constant_p(j) ? -((-(i)) >> (j)) \ + : ((i) + (1<<(j)) - 1) >> (j)) + +#if defined(__GNUC__) || defined(__clang__) +# define scale_builtin_constant_p __builtin_constant_p +#else +# define scale_builtin_constant_p(x) 0 +#endif + +enum ResFactor +{ + RES_FACTOR_64, RES_FACTOR_32, RES_FACTOR_16, RES_FACTOR_8, + RES_FACTOR_4, RES_FACTOR_DEF, NUM_RES_FACTOR +}; + +enum ScalerFactor +{ + FACTOR_4, FACTOR_8, NUM_FACTOR +}; + +enum FilterSize +{ + FIL_4, FIL_6, FIL_8, FIL_9, FIL_10, FIL_11, FIL_13, FIL_15, + FIL_16, FIL_17, FIL_19, FIL_22, FIL_24, FIL_DEF, NUM_FIL +}; + +class ScalerFilter { +public: + int m_filtLen; + int32_t* m_filtPos; // Array of horizontal/vertical starting pos for each dst for luma / chroma planes. + int16_t* m_filt; // Array of horizontal/vertical filter coefficients for luma / chroma planes. + ScalerSlice* m_sourceSlice; // Source slice + ScalerSlice* m_destSlice; // Output slice + ScalerFilter(); + virtual ~ScalerFilter(); + virtual void process(int sliceVer, int sliceHor) = 0; + int initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos); + void setSlice(ScalerSlice* source, ScalerSlice* dest) { m_sourceSlice = source; m_destSlice = dest; } +}; + +class VideoDesc { +public: + int m_width; + int m_height; + int m_csp; + int m_inputDepth; + + VideoDesc(int w, int h, int csp, int bitDepth) + { + m_width = w; + m_height = h; + m_csp = csp; + m_inputDepth = bitDepth; + } +}; + +typedef struct ScalerPlane +{ + int availLines; // max number of lines that can be held by this plane + int sliceVer; // index of first line + int sliceHor; // number of lines + uint8_t** lineBuf; // line buffer +} ScalerPlane; + +// Assist horizontal filtering, base class +class HFilterScaler { +public: + int m_bitDepth; +public: + HFilterScaler() :m_bitDepth(0) {}; + virtual ~HFilterScaler() {}; + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) = 0; +}; + +// Assist vertical filtering, base class +class VFilterScaler { +public: + int m_bitDepth; +public: + VFilterScaler() :m_bitDepth(0) {}; + virtual ~VFilterScaler() {}; + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) = 0; +}; + +// Assist horizontal filtering, process 8 bit case +class HFilterScaler8Bit : public HFilterScaler { +public: + HFilterScaler8Bit() { m_bitDepth = 8; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist horizontal filtering, process 10 bit case +class HFilterScaler10Bit : public HFilterScaler { +public: + HFilterScaler10Bit() { m_bitDepth = 10; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist vertical filtering, process 8 bit case +class VFilterScaler8Bit : public VFilterScaler { +public: + VFilterScaler8Bit() { m_bitDepth = 8; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Assist vertical filtering, process 10 bit case +class VFilterScaler10Bit : public VFilterScaler { +public: + VFilterScaler10Bit() { m_bitDepth = 10; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Horizontal filter for luma +class ScalerHLumFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHLumFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHLumFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Horizontal filter for chroma +class ScalerHCrFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHCrFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHCrFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for luma +class ScalerVLumFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVLumFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVLumFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for chroma +class ScalerVCrFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVCrFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVCrFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +class ScalerSlice +{ +private: + enum ScalerSlicePlaneNum { m_numSlicePlane = 4 }; +public: + int m_width; // Slice line width + int m_hCrSubSample; // horizontal Chroma subsampling factor
View file
x265_3.3.tar.gz/source/common/threading.h -> x265_3.4.tar.gz/source/common/threading.h
Changed
@@ -238,6 +238,14 @@ LeaveCriticalSection(&m_cs); } + void decr() + { + EnterCriticalSection(&m_cs); + m_val--; + WakeAllConditionVariable(&m_cv); + LeaveCriticalSection(&m_cs); + } + protected: CRITICAL_SECTION m_cs; @@ -436,6 +444,14 @@ pthread_mutex_unlock(&m_mutex); } + void decr() + { + pthread_mutex_lock(&m_mutex); + m_val--; + pthread_cond_broadcast(&m_cond); + pthread_mutex_unlock(&m_mutex); + } + protected: pthread_mutex_t m_mutex;
View file
x265_3.3.tar.gz/source/encoder/analysis.cpp -> x265_3.4.tar.gz/source/encoder/analysis.cpp
Changed
@@ -1272,7 +1272,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1296,7 +1296,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1314,15 +1314,23 @@ skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } - if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) + if (md.bestMode && m_param->recursionSkipMode && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) { skipRecursion = md.bestMode->cu.isSkipped(0); - if (mightSplit && depth >= minDepth && !skipRecursion) + if (mightSplit && !skipRecursion) { - if (depth) - skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); - if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + if (depth >= minDepth && m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + if (depth) + skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); + if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + skipRecursion = complexityCheckCU(*md.bestMode); + } + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { skipRecursion = complexityCheckCU(*md.bestMode); + } + } } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) @@ -1972,7 +1980,7 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N) @@ -1996,7 +2004,7 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } } @@ -2015,8 +2023,10 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode); } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) skipRecursion = true; @@ -3525,27 +3535,47 @@ bool Analysis::complexityCheckCU(const Mode& bestMode) { - uint32_t mean = 0; - uint32_t homo = 0; - uint32_t cuSize = bestMode.fencYuv->m_size; - for (uint32_t y = 0; y < cuSize; y++) { - for (uint32_t x = 0; x < cuSize; x++) { - mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]); + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + uint32_t mean = 0; + uint32_t homo = 0; + uint32_t cuSize = bestMode.fencYuv->m_size; + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]); + } } - } - mean = mean / (cuSize * cuSize); - for (uint32_t y = 0 ; y < cuSize; y++){ - for (uint32_t x = 0 ; x < cuSize; x++){ - homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean)); + mean = mean / (cuSize * cuSize); + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean)); + } } - } - homo = homo / (cuSize * cuSize); + homo = homo / (cuSize * cuSize); - if (homo < (.1 * mean)) - return true; + if (homo < (.1 * mean)) + return true; - return false; -} + return false; + } + else + { + int blockType = bestMode.cu.m_log2CUSize[0] - LOG2_UNIT_SIZE; + int shift = bestMode.cu.m_log2CUSize[0] * LOG2_UNIT_SIZE; + intptr_t stride = m_frame->m_fencPic->m_stride; + intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride; + uint64_t sum_ss = primitives.cu[blockType].var(m_frame->m_edgeBitPic + blockOffsetLuma, stride); + uint32_t sum = (uint32_t)sum_ss; + uint32_t ss = (uint32_t)(sum_ss >> 32); + uint32_t pixelCount = 1 << shift; + double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount; + + if (cuEdgeVariance > (double)m_param->edgeVarThreshold) + return false; + else + return true; + } + } uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom) { @@ -3570,7 +3600,6 @@ cnt++; } } - return cuVariance / cnt; }
View file
x265_3.3.tar.gz/source/encoder/analysis.h -> x265_3.4.tar.gz/source/encoder/analysis.h
Changed
@@ -52,7 +52,7 @@ splitRefs = 0; mvCost[0] = 0; // L0 mvCost[1] = 0; // L1 - sa8dCost = 0; + sa8dCost = 0; } }; @@ -120,7 +120,6 @@ Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext); int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU); - protected: /* Analysis data for save/load mode, writes/reads data based on absPartIdx */ x265_analysis_inter_data* m_reuseInterDataCTU;
View file
x265_3.3.tar.gz/source/encoder/api.cpp -> x265_3.4.tar.gz/source/encoder/api.cpp
Changed
@@ -1016,12 +1016,12 @@ void x265_zone_free(x265_param *param) { - if (param && param->rc.zonefileCount) { + if (param && param->rc.zones && (param->rc.zoneCount || param->rc.zonefileCount)) + { for (int i = 0; i < param->rc.zonefileCount; i++) x265_free(param->rc.zones[i].zoneParam); - } - if (param && (param->rc.zoneCount || param->rc.zonefileCount)) x265_free(param->rc.zones); + } } static const x265_api libapi = @@ -1294,6 +1294,8 @@ fprintf(csvfp, "RateFactor, "); if (param->rc.vbvBufferSize) fprintf(csvfp, "BufferFill, BufferFillFinal, "); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(csvfp, "UnclippedBufferFillFinal, "); if (param->bEnablePsnr) fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, "); if (param->bEnableSsim) @@ -1405,6 +1407,8 @@ fprintf(param->csvfpt, "%.3lf,", frameStats->rateFactor); if (param->rc.vbvBufferSize) fprintf(param->csvfpt, "%.3lf, %.3lf,", frameStats->bufferFill, frameStats->bufferFillFinal); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(param->csvfpt, "%.3lf,", frameStats->unclippedBufferFillFinal); if (param->bEnablePsnr) fprintf(param->csvfpt, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr); if (param->bEnableSsim)
View file
x265_3.3.tar.gz/source/encoder/encoder.cpp -> x265_3.4.tar.gz/source/encoder/encoder.cpp
Changed
@@ -218,10 +218,7 @@ if (m_param->bHistBasedSceneCut) { - for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; i++) - { - m_planeSizes[i] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]); - } + m_planeSizes[0] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[0]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[0]); uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1; m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); m_edgeHistThreshold = m_param->edgeTransitionThreshold; @@ -1443,9 +1440,9 @@ int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; memset(m_edgePic, 0, bufSize); - if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false)) + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1)) { - x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!"); + x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!"); return false; } @@ -1605,6 +1602,14 @@ if (m_param->bHistBasedSceneCut && pic_in) { x265_picture *pic = (x265_picture *) pic_in; + + if (pic->poc == 0) + { + /* for entire encode compute the chroma plane sizes only once */ + for (int i = 1; i < x265_cli_csps[m_param->internalCsp].planes; i++) + m_planeSizes[i] = (pic->width >> x265_cli_csps[m_param->internalCsp].width[i]) * (pic->height >> x265_cli_csps[m_param->internalCsp].height[i]); + } + if (computeHistograms(pic)) { double maxUVSad = 0.0, edgeSad = 0.0; @@ -1752,6 +1757,12 @@ } } } + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut) + { + pixel* src = m_edgePic; + primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride, + inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0); + } } else { @@ -2414,7 +2425,7 @@ encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers encParam->bEnableFastIntra = param->bEnableFastIntra; encParam->bEnableEarlySkip = param->bEnableEarlySkip; - encParam->bEnableRecursionSkip = param->bEnableRecursionSkip; + encParam->recursionSkipMode = param->recursionSkipMode; encParam->searchMethod = param->searchMethod; /* Scratch buffer prevents me_range from being increased for esa/tesa */ if (param->searchRange < encParam->searchRange) @@ -3006,6 +3017,8 @@ frameStats->ipCostRatio = curFrame->m_lowres.ipCostRatio; frameStats->bufferFill = m_rateControl->m_bufferFillActual; frameStats->bufferFillFinal = m_rateControl->m_bufferFillFinal; + if (m_param->csvLogLevel >= 2) + frameStats->unclippedBufferFillFinal = m_rateControl->m_unclippedBufferFillFinal; frameStats->frameLatency = inPoc - poc; if (m_param->rc.rateControlMode == X265_RC_CRF) frameStats->rateFactor = curEncData.m_rateFactor; @@ -3400,7 +3413,7 @@ p->maxNumReferences = zone->maxNumReferences; p->bEnableFastIntra = zone->bEnableFastIntra; p->bEnableEarlySkip = zone->bEnableEarlySkip; - p->bEnableRecursionSkip = zone->bEnableRecursionSkip; + p->recursionSkipMode = zone->recursionSkipMode; p->searchMethod = zone->searchMethod; p->searchRange = zone->searchRange; p->subpelRefine = zone->subpelRefine; @@ -3681,20 +3694,6 @@ if (p->analysisLoad && !p->analysisLoadReuseLevel) p->analysisLoadReuseLevel = 5; - if ((p->bAnalysisType == DEFAULT) && p->rc.cuTree) - { - if (p->analysisSaveReuseLevel && p->analysisSaveReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-save-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - if (p->analysisLoadReuseLevel && p->analysisLoadReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-load-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - } - if ((p->analysisLoad || p->analysisSave) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation)) { x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme, Disabling pmode/pme\n"); @@ -3867,29 +3866,30 @@ } else { - if (fread(&m_conformanceWindow.rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) + int rightOffset, bottomOffset; + if (fread(&rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window right offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.rightOffset && p->analysisLoadReuseLevel > 1) + else if (rightOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.rightOffset * scaleFactor; + padsize = rightOffset * scaleFactor; p->sourceWidth += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.rightOffset = padsize; } - if (fread(&m_conformanceWindow.bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) + if (fread(&bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window bottom offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.bottomOffset && p->analysisLoadReuseLevel > 1) + else if (bottomOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.bottomOffset * scaleFactor; + padsize = bottomOffset * scaleFactor; p->sourceHeight += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.bottomOffset = padsize; @@ -4196,7 +4196,7 @@ x265_log(p, X265_LOG_WARNING, "Radl requires fixed gop-length (keyint == min-keyint). Disabling radl.\n"); } - if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP) + if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP && m_param->bResetZoneConfig) { p->chunkStart = p->chunkEnd = 0; x265_log(p, X265_LOG_WARNING, "Chunking requires closed gop structure. Disabling chunking.\n"); @@ -4229,12 +4229,6 @@ x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n"); } - if (!m_param->bResetZoneConfig && (p->keyframeMax != p->keyframeMin)) - x265_log(p, X265_LOG_WARNING, "External zone reconfiguration requires a fixed GOP size to enable appropriate signaling of HRD info\n"); - - if (!m_param->bResetZoneConfig && (p->reconfigWindowSize != (uint64_t)p->keyframeMax)) - x265_log(p, X265_LOG_WARNING, "Zone size must be multiple of GOP size to enable appropriate signaling of HRD info\n"); - if (m_param->bEnableHME) { if (m_param->sourceHeight < 540) @@ -4311,18 +4305,27 @@ } } + uint32_t numCUsLoad, numCUsInHeightLoad; + /* Now arrived at the right frame, read the record */ analysis->poc = poc; analysis->frameRecordSize = frameRecordSize; X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType)); X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut)); X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost)); - X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); + X265_FREAD(&numCUsLoad, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions)); + /* Update analysis info to save current settings */ + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t numCUsInFrame = widthInCU * heightInCU; + analysis->numCUsInFrame = numCUsInFrame; + analysis->numCuInHeight = heightInCU; + if (m_param->bDisableLookahead) { - X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); + X265_FREAD(&numCUsInHeightLoad, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead)); } int scaledNumPartition = analysis->numPartitions; @@ -4335,16 +4338,16 @@ if (m_param->ctuDistortionRefine == CTU_DISTORTION_INTERNAL) { - X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), analysis->numCUsInFrame, m_analysisFileIn, picDistortion); + X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), numCUsLoad, m_analysisFileIn, picDistortion); computeDistortionOffset(analysis); } if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { size_t vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
View file
x265_3.3.tar.gz/source/encoder/frameencoder.cpp -> x265_3.4.tar.gz/source/encoder/frameencoder.cpp
Changed
@@ -130,7 +130,7 @@ { rowSum += sliceGroupSizeAccu; m_sliceBaseRow[++sidx] = i; - } + } } X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); m_sliceBaseRow[0] = 0; @@ -448,6 +448,18 @@ m_ssimCnt = 0; memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats)); + if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + int height = m_frame->m_fencPic->m_picHeight; + int width = m_frame->m_fencPic->m_picWidth; + intptr_t stride = m_frame->m_fencPic->m_stride; + + if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1)) + { + x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !"); + } + } + /* Emit access unit delimiter unless this is the first frame and the user is * not repeating headers (since AUD is supposed to be the first NAL in the access * unit) */
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.cpp -> x265_3.4.tar.gz/source/encoder/ratecontrol.cpp
Changed
@@ -269,7 +269,7 @@ x265_log(m_param, X265_LOG_WARNING, "NAL HRD parameters require VBV parameters, ignored\n"); m_param->bEmitHRDSEI = 0; } - m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && !m_2pass && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; + m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; if (m_param->rc.bStrictCbr && !m_isCbr) { x265_log(m_param, X265_LOG_WARNING, "strict CBR set without CBR mode, ignored\n"); @@ -335,7 +335,7 @@ int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; int vbvMaxBitrate = m_param->rc.vbvMaxBitrate * 1000; - if (m_param->bEmitHRDSEI) + if (m_param->bEmitHRDSEI && !m_param->decoderVbvMaxRate) { const HRDInfo* hrd = &sps.vuiParameters.hrdParameters; vbvBufferSize = hrd->cpbSizeValue << (hrd->cpbSizeScale + CPB_SHIFT); @@ -509,6 +509,7 @@ CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold); CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh); + CMP_OPT_FIRST_PASS("frame-dup", m_param->bEnableFrameDuplication); if (m_param->bMultiPassOptRPS) { CMP_OPT_FIRST_PASS("multi-pass-opt-rps", m_param->bMultiPassOptRPS); @@ -546,7 +547,7 @@ x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); } - if (m_param->totalFrames > m_numEntries) + if (m_param->totalFrames > m_numEntries && !m_param->bEnableFrameDuplication) { x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); @@ -781,6 +782,10 @@ // Init HRD HRDInfo* hrd = &sps.vuiParameters.hrdParameters; hrd->cbrFlag = m_isCbr; + if (m_param->reconfigWindowSize) { + hrd->cbrFlag = 0; + vbvMaxBitrate = m_param->decoderVbvMaxRate * 1000; + } // normalize HRD size and rate to the value / scale notation hrd->bitRateScale = x265_clip3(0, 15, calcScale(vbvMaxBitrate) - BR_SHIFT); @@ -829,7 +834,7 @@ /* weighted average of cplx of future frames */ for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++) { - int index = m_encOrder[i + j]; + int index = i+j; RateControlEntry *rcj = &m_rce2Pass[index]; weight *= 1 - pow(rcj->iCuCount / m_ncu, 2); if (weight < 0.0001) @@ -842,7 +847,7 @@ weight = 1.0; for (int j = 0; j <= cplxBlur * 2 && j <= i; j++) { - int index = m_encOrder[i - j]; + int index = i-j; RateControlEntry *rcj = &m_rce2Pass[index]; gaussianWeight = weight * exp(-j * j / 200.0); weightSum += gaussianWeight; @@ -851,7 +856,7 @@ if (weight < .0001) break; } - m_rce2Pass[m_encOrder[i]].blurredComplexity = cplxSum / weightSum; + m_rce2Pass[i].blurredComplexity= cplxSum / weightSum; } CHECKED_MALLOC(qScale, double, m_numEntries); if (filterSize > 1) @@ -870,7 +875,7 @@ expectedBits = 1; for (int i = 0; i < m_numEntries; i++) { - RateControlEntry* rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry* rce = &m_rce2Pass[i]; double q = getQScale(rce, 1.0); expectedBits += qScale2bits(rce, q); m_lastQScaleFor[rce->sliceType] = q; @@ -893,15 +898,15 @@ /* find qscale */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry *rce = &m_rce2Pass[i]; qScale[i] = getQScale(rce, rateFactor); m_lastQScaleFor[rce->sliceType] = qScale[i]; } /* fixed I/B qscale relative to P */ - for (int i = m_numEntries - 1; i >= 0; i--) + for (int i = 0; i < m_numEntries; i++) { - qScale[i] = getDiffLimitedQScale(&m_rce2Pass[m_encOrder[i]], qScale[i]); + qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]); X265_CHECK(qScale[i] >= 0, "qScale became negative\n"); } @@ -912,7 +917,6 @@ for (int i = 0; i < m_numEntries; i++) { double q = 0.0, sum = 0.0; - for (int j = 0; j < filterSize; j++) { int idx = i + j - filterSize / 2; @@ -920,7 +924,7 @@ double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur)); if (idx < 0 || idx >= m_numEntries) continue; - if (m_rce2Pass[m_encOrder[i]].sliceType != m_rce2Pass[m_encOrder[idx]].sliceType) + if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType) continue; q += qScale[idx] * coeff; sum += coeff; @@ -932,7 +936,7 @@ /* find expected bits */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry *rce = &m_rce2Pass[i]; rce->newQScale = clipQscale(NULL, rce, blurredQscale[i]); // check if needed X265_CHECK(rce->newQScale >= 0, "new Qscale is negative\n"); expectedBits += qScale2bits(rce, rce->newQScale); @@ -1279,6 +1283,7 @@ m_param->rc.vbvMaxBitrate = m_param->rc.zones[i].zoneParam->rc.vbvMaxBitrate; memcpy(m_relativeComplexity, m_param->rc.zones[i].relativeComplexity, sizeof(double) * m_param->reconfigWindowSize); reconfigureRC(); + m_isCbr = 1; /* Always vbvmaxrate == bitrate here*/ m_top->zoneReadCount[i].incr(); } } @@ -1951,7 +1956,7 @@ /* Adjust quant based on the difference between * achieved and expected bitrate so far */ double curTime = (double)rce->encodeOrder / m_numEntries; - double w = x265_clip3(0.0, 1.0, curTime * 100); + double w = x265_clip3(0.0, 1.0, curTime); q *= pow((double)m_totalBits / m_expectedBitsSum, w); } if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled) @@ -2742,7 +2747,9 @@ x265_log(m_param, X265_LOG_WARNING, "poc:%d, VBV underflow (%.0f bits)\n", rce->poc, m_bufferFillFinal); m_bufferFillFinal = X265_MAX(m_bufferFillFinal, 0); - m_bufferFillFinal += m_bufferRate; + m_bufferFillFinal += rce->bufferRate; + if (m_param->csvLogLevel >= 2) + m_unclippedBufferFillFinal = m_bufferFillFinal; if (m_param->rc.bStrictCbr) { @@ -2752,14 +2759,14 @@ filler += FILLER_OVERHEAD * 8; } m_bufferFillFinal -= filler; - bufferBits = X265_MIN(bits + filler + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + filler + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits + filler, 0); m_bufferFillActual += bufferBits - bits - filler; } else { m_bufferFillFinal = X265_MIN(m_bufferFillFinal, m_bufferSize); - bufferBits = X265_MIN(bits + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits, 0); m_bufferFillActual += bufferBits - bits; m_bufferFillActual = X265_MIN(m_bufferFillActual, m_bufferSize);
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.h -> x265_3.4.tar.gz/source/encoder/ratecontrol.h
Changed
@@ -157,6 +157,7 @@ double m_rateFactorConstant; double m_bufferSize; double m_bufferFillFinal; /* real buffer as of the last finished frame */ + double m_unclippedBufferFillFinal; /* real unclipped buffer as of the last finished frame used to log in CSV*/ double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */ double m_bufferRate; /* # of bits added to buffer_fill after each frame */ double m_vbvMaxRate; /* in kbps */
View file
x265_3.3.tar.gz/source/encoder/slicetype.cpp -> x265_3.4.tar.gz/source/encoder/slicetype.cpp
Changed
@@ -87,7 +87,7 @@ namespace X265_NS { -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta) +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel) { intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0; intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0; @@ -141,7 +141,7 @@ theta = 180 + theta; edgeTheta[middle] = (pixel)theta; } - edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel); + edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel); } } return true; @@ -519,6 +519,13 @@ if (param->rc.aqMode == X265_AQ_EDGE) edgeFilter(curFrame, param); + if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->recursionSkipMode == EDGE_BASED_RSKIP) + { + pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX; + primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic, + curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE); + } + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE) { double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
View file
x265_3.3.tar.gz/source/encoder/slicetype.h -> x265_3.4.tar.gz/source/encoder/slicetype.h
Changed
@@ -44,9 +44,9 @@ #define EDGE_INCLINATION 45 #if HIGH_BIT_DEPTH -#define edgeThreshold 1023.0 +#define EDGE_THRESHOLD 1023.0 #else -#define edgeThreshold 255.0 +#define EDGE_THRESHOLD 255.0 #endif #define PI 3.14159265 @@ -101,7 +101,7 @@ protected: uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize); - uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); + uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp); bool allocWeightedRef(Lowres& fenc); @@ -265,7 +265,6 @@ CostEstimateGroup& operator=(const CostEstimateGroup&); }; -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta); - +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD); } #endif // ifndef X265_SLICETYPE_H
View file
x265_3.3.tar.gz/source/test/CMakeLists.txt -> x265_3.4.tar.gz/source/test/CMakeLists.txt
Changed
@@ -23,13 +23,15 @@ # add ARM assembly files if(ARM OR CROSS_COMPILE_ARM) - enable_language(ASM) - set(NASM_SRC checkasm-arm.S) - add_custom_command( - OUTPUT checkasm-arm.obj - COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj - DEPENDS checkasm-arm.S) + if(NOT ARM64) + enable_language(ASM) + set(NASM_SRC checkasm-arm.S) + add_custom_command( + OUTPUT checkasm-arm.obj + COMMAND ${CMAKE_CXX_COMPILER} + ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj + DEPENDS checkasm-arm.S) + endif() endif(ARM OR CROSS_COMPILE_ARM) # add PowerPC assembly files
View file
x265_3.3.tar.gz/source/test/regression-tests.txt -> x265_3.4.tar.gz/source/test/regression-tests.txt
Changed
@@ -75,7 +75,7 @@ News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0 News-4k.y4m,--preset superfast --slices 4 --aq-mode 0 News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16 -News-4k.y4m,--preset veryslow --no-rskip +News-4k.y4m,--preset veryslow --rskip 0 News-4k.y4m,--preset veryslow --pme --crf 40 OldTownCross_1920x1080_50_10bit_422.yuv,--preset superfast --weightp OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp @@ -162,7 +162,11 @@ sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02 - +crowd_run_1920x1080_50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5 +crowd_run_1920x1080_50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4 +crowd_run_1920x1080_50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 +crowd_run_1920x1080_50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4 + # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium
View file
x265_3.3.tar.gz/source/test/save-load-tests.txt -> x265_3.4.tar.gz/source/test/save-load-tests.txt
Changed
@@ -18,3 +18,4 @@ RaceHorses_416x240_30.y4m, --preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22 --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m, --preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2 crowd_run_540p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 crowd_run_540p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 +News-4k.y4m, --preset medium --analysis-save x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000::News-4k.y4m, --analysis-load x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
View file
x265_3.3.tar.gz/source/test/testbench.cpp -> x265_3.4.tar.gz/source/test/testbench.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -208,6 +209,14 @@ EncoderPrimitives asmprim; memset(&asmprim, 0, sizeof(asmprim)); setupAssemblyPrimitives(asmprim, test_arch[i].flag); + +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, asmprim, test_arch[i].flag); +#endif + setupAliasPrimitives(asmprim); memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives)); for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++) @@ -232,6 +241,13 @@ #endif setupAssemblyPrimitives(optprim, cpuid); +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, optprim, cpuid); +#endif + /* Note that we do not setup aliases for performance tests, that would be * redundant. The testbench only verifies they are correctly aliased */
View file
x265_3.3.tar.gz/source/test/testharness.h -> x265_3.4.tar.gz/source/test/testharness.h
Changed
@@ -3,6 +3,7 @@ * * Authors: Steve Borho <steve@borho.org> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,12 +82,16 @@ #if X265_ARCH_X86 asm volatile("rdtsc" : "=a" (a) ::"edx"); #elif X265_ARCH_ARM +#if X265_ARCH_ARM64 + asm volatile("mrs %0, cntvct_el0" : "=r"(a)); +#else // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); // TO-DO: replace clock() function with appropriate ARM cpu instructions a = clock(); #endif +#endif return a; } #endif // ifdef _MSC_VER
View file
x265_3.3.tar.gz/source/x265.cpp -> x265_3.4.tar.gz/source/x265.cpp
Changed
@@ -27,11 +27,7 @@ #include "x265.h" #include "x265cli.h" - -#include "input/input.h" -#include "output/output.h" -#include "output/reconplay.h" -#include "svt.h" +#include "abrEncApp.h" #if HAVE_VLD /* Visual Leak Detector */ @@ -47,191 +43,59 @@ #include <fstream> #include <queue> -#define CONSOLE_TITLE_SIZE 200 -#ifdef _WIN32 -#include <windows.h> -#define SetThreadExecutionState(es) -static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = ""; -#else -#define GetConsoleTitle(t, n) -#define SetConsoleTitle(t) -#define SetThreadExecutionState(es) -#endif - using namespace X265_NS; -/* Ctrl-C handler */ -static volatile sig_atomic_t b_ctrl_c /* = 0 */; -static void sigint_handler(int) -{ - b_ctrl_c = 1; -} -#define START_CODE 0x00000001 -#define START_CODE_BYTES 4 - -struct CLIOptions -{ - InputFile* input; - ReconFile* recon; - OutputFile* output; - FILE* qpfile; - FILE* zoneFile; - FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ - const char* reconPlayCmd; - const x265_api* api; - x265_param* param; - x265_vmaf_data* vmafData; - bool bProgress; - bool bForceY4m; - bool bDither; - uint32_t seek; // number of frames to skip from the beginning - uint32_t framesToBeEncoded; // number of frames to encode - uint64_t totalbytes; - int64_t startTime; - int64_t prevUpdateTime; - - /* in microseconds */ - static const int UPDATE_INTERVAL = 250000; - - CLIOptions() - { - input = NULL; - recon = NULL; - output = NULL; - qpfile = NULL; - zoneFile = NULL; - dolbyVisionRpu = NULL; - reconPlayCmd = NULL; - api = NULL; - param = NULL; - vmafData = NULL; - framesToBeEncoded = seek = 0; - totalbytes = 0; - bProgress = true; - bForceY4m = false; - startTime = x265_mdate(); - prevUpdateTime = 0; - bDither = false; - } +#define X265_HEAD_ENTRIES 3 - void destroy(); - void printStatus(uint32_t frameNum); - bool parse(int argc, char **argv); - bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount); - bool parseQPFile(x265_picture &pic_org); - bool parseZoneFile(); -}; - -void CLIOptions::destroy() -{ - if (input) - input->release(); - input = NULL; - if (recon) - recon->release(); - recon = NULL; - if (qpfile) - fclose(qpfile); - qpfile = NULL; - if (zoneFile) - fclose(zoneFile); - zoneFile = NULL; - if (dolbyVisionRpu) - fclose(dolbyVisionRpu); - dolbyVisionRpu = NULL; - if (output) - output->release(); - output = NULL; -} - -void CLIOptions::printStatus(uint32_t frameNum) -{ - char buf[200]; - int64_t time = x265_mdate(); - - if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL)) - return; - - int64_t elapsed = time - startTime; - double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0; - float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum); - if (framesToBeEncoded) - { - int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000)); - sprintf(buf, "x265 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", - 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate, - eta / 3600, (eta / 60) % 60, eta % 60); - } - else - sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate); - - fprintf(stderr, "%s \r", buf + 5); - SetConsoleTitle(buf); - fflush(stderr); // needed in windows - prevUpdateTime = time; -} +#ifdef _WIN32 +#define strdup _strdup +#endif -bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount) +#ifdef _WIN32 +/* Copy of x264 code, which allows for Unicode characters in the command line. + * Retrieve command line arguments as UTF-8. */ +static int get_argv_utf8(int *argc_ptr, char ***argv_ptr) { - bool bError = false; - int bShowHelp = false; - int outputBitDepth = 0; - const char *profile = NULL; - - /* Presets are applied before all other options. */ - for (optind = 0;;) - { - int c = getopt_long(argc, argv, short_options, long_options, NULL); - if (c == -1) - break; - else if (c == 'D') - outputBitDepth = atoi(optarg); - else if (c == 'P') - profile = optarg; - else if (c == '?') - bShowHelp = true; - } - - if (!outputBitDepth && profile) - { - /* try to derive the output bit depth from the requested profile */ - if (strstr(profile, "10")) - outputBitDepth = 10; - else if (strstr(profile, "12")) - outputBitDepth = 12; - else - outputBitDepth = 8; - } - - api = x265_api_get(outputBitDepth); - if (!api) + int ret = 0; + wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr); + if (argv_utf16) { - x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); - api = x265_api_get(0); - } + int argc = *argc_ptr; + int offset = (argc + 1) * sizeof(char*); + int size = offset; - if (bShowHelp) - { - printVersion(globalParam, api); - showHelp(globalParam); - } + for (int i = 0; i < argc; i++)
View file
x265_3.3.tar.gz/source/x265.h -> x265_3.4.tar.gz/source/x265.h
Changed
@@ -134,6 +134,7 @@ int ctuDistortionRefine; int rightOffset; int bottomOffset; + int frameDuplication; }x265_analysis_validate; /* Stores intra analysis data for a single frame. This struct needs better packing */ @@ -304,6 +305,7 @@ double totalFrameTime; double vmafFrameScore; double bufferFillFinal; + double unclippedBufferFillFinal; } x265_frame_stats; typedef struct x265_ctu_info_t @@ -1255,9 +1257,9 @@ * skip blocks. Default is disabled */ int bEnableEarlySkip; - /* Enable early CU size decisions to avoid recursing to higher depths. + /* Enable early CU size decisions to avoid recursing to higher depths. * Default is enabled */ - int bEnableRecursionSkip; + int recursionSkipMode; /* Use a faster search method to find the best intra mode. Default is 0 */ int bEnableFastIntra; @@ -1857,7 +1859,7 @@ double edgeTransitionThreshold; /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */ - int bHistBasedSceneCut; + int bHistBasedSceneCut; /* Enable HME search ranges for L0, L1 and L2 respectively. */ int hmeRange[3]; @@ -1874,7 +1876,7 @@ * analysis information stored in analysis-save. Higher the refine level higher * the information stored. Default is 5 */ int analysisSaveReuseLevel; - + /* A value between 1 and 10 (both inclusive) determines the level of * analysis information reused in analysis-load. Higher the refine level higher * the information reused. Default is 5 */ @@ -1901,6 +1903,12 @@ * info is available from the corresponding analysis-save. */ int confWinBottomOffset; + + /* Edge variance threshold for quad tree establishment. */ + float edgeVarThreshold; + + /* Maxrate that could be signaled to the decoder. Default 0. API only. */ + int decoderVbvMaxRate; } x265_param; /* x265_param_alloc:
View file
x265_3.4.tar.gz/source/x265cli.cpp
Added
@@ -0,0 +1,1062 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Steve Borho <steve@borho.org> + * Min Chen <chenm003@163.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ +#if _MSC_VER +#pragma warning(disable: 4127) // conditional expression is constant, yes I know +#endif + +#include "x265cli.h" +#include "svt.h" + +#define START_CODE 0x00000001 +#define START_CODE_BYTES 4 + +#ifdef __cplusplus +namespace X265_NS { +#endif + + static void printVersion(x265_param *param, const x265_api* api) + { + x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); + x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); + } + + static void showHelp(x265_param *param) + { + int level = param->logLevel; + +#define OPT(value) (value ? "enabled" : "disabled") +#define H0 printf +#define H1 if (level >= X265_LOG_DEBUG) printf + + H0("\nSyntax: x265 [options] infile [-o] outfile\n"); + H0(" infile can be YUV or Y4M\n"); + H0(" outfile is raw HEVC bitstream\n"); + H0("\nExecutable Options:\n"); + H0("-h/--help Show this help text and exit\n"); + H0(" --fullhelp Show all options and exit\n"); + H0("-V/--version Show version info and exit\n"); + H0("\nOutput Options:\n"); + H0("-o/--output <filename> Bitstream output file name\n"); + H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); + H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]); + H0(" --no-progress Disable CLI progress reports\n"); + H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); + H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); + H0("\nInput Options:\n"); + H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); + H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); + H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); + H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n"); + H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); + H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); + H1(" 0 - i400 (4:0:0 monochrome)\n"); + H1(" 1 - i420 (4:2:0 default)\n"); + H1(" 2 - i422 (4:2:2)\n"); + H1(" 3 - i444 (4:4:4)\n"); +#if ENABLE_HDR10_PLUS + H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); + H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); +#endif + H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); + H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" + " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); + H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); + H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); + H0(" --seek <integer> First frame to encode\n"); + H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); + H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT(param->bField)); + H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); + H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); + H0("\nQuality reporting metrics:\n"); + H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); + H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); + H0("\nProfile, Level, Tier:\n"); + H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); + H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); + H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); + H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); + H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); + H0("\nThreading, performance:\n"); + H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); + H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); + H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); + H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); + H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); + H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); + H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); + H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n"); + H0("\nPresets:\n"); + H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); + H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); + H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); + H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); + H0("\nQuad-Tree size and depth:\n"); + H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); + H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); + H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); + H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); + H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); + H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); + H0("\nAnalysis:\n"); + H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); + H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); + H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); + H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); + H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); + H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); + H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); + H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); + H0(" --rskip <mode> Set mode for early exit from recursion. Mode 1: exit using rdcost & CU homogenity. Mode 2: exit using CU edge density.\n" + " Mode 0: disabled. Default %d\n", param->recursionSkipMode); + H1(" --rskip-edge-threshold Threshold in terms of percentage (integer of range [0,100]) for minimum edge density in CUs used to prun the recursion depth. Applicable only for rskip mode 2. Value is preset dependent. Default: %.f\n", param->edgeVarThreshold*100.0f); + H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); + H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); + H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); + H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); + H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" + " - 1: force the partitions if CTU information is present\n" + " - 2: functionality of (1) and reduce qp if CTU information has changed\n" + " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" + " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); + H0("\nCoding tools:\n"); + H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); + H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); + H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); + H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); + H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); + H0("\nTemporal / motion search options:\n"); + H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); + H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); + H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); + H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); + H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); + H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); + H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); + H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); + H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); + H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); + H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); + H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]); + H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange[0], param->hmeRange[1], param->hmeRange[2]); + H0("\nSpatial / intra options:\n"); + H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); + H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra)); + H0(" --[no-]b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames)); + H0(" --[no-]fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra)); + H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty); + H0("\nSlice decision options:\n"); + H0(" --[no-]open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP)); + H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax); + H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n"); + H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); + H0(" --no-scenecut Disable adaptive I-frame decision\n"); + H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n"); + H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n"); + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); + H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); + H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp)); + H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow); + H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta); + H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); + H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n"); + H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth); + H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices); + H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads); + H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes); + H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias); + H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive); + H0(" --[no-]b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid)); + H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n"); + H1(" Format of each line: framenumber frametype QP\n"); + H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n"); + H1(" QPs are restricted by qpmin/qpmax.\n"); + H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush); + H1(" 0 - flush the encoder only when all the input pictures are over.\n"); + H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n"); + H1(" 2 - flush the slicetype decided frames only.\n");
View file
x265_3.3.tar.gz/source/x265cli.h -> x265_3.4.tar.gz/source/x265cli.h
Changed
@@ -27,9 +27,23 @@ #include "common.h" #include "param.h" +#include "input/input.h" +#include "output/output.h" +#include "output/reconplay.h" #include <getopt.h> +#define CONSOLE_TITLE_SIZE 200 +#ifdef _WIN32 +#include <windows.h> +#define SetThreadExecutionState(es) +static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = ""; +#else +#define GetConsoleTitle(t, n) +#define SetConsoleTitle(t) +#define SetThreadExecutionState(es) +#endif + #ifdef __cplusplus namespace X265_NS { #endif @@ -105,8 +119,8 @@ { "amp", no_argument, NULL, 0 }, { "no-early-skip", no_argument, NULL, 0 }, { "early-skip", no_argument, NULL, 0 }, - { "no-rskip", no_argument, NULL, 0 }, - { "rskip", no_argument, NULL, 0 }, + { "rskip", required_argument, NULL, 0 }, + { "rskip-edge-threshold", required_argument, NULL, 0 }, { "no-fast-cbf", no_argument, NULL, 0 }, { "fast-cbf", no_argument, NULL, 0 }, { "no-tskip", no_argument, NULL, 0 }, @@ -358,6 +372,7 @@ { "cll", no_argument, NULL, 0 }, { "no-cll", no_argument, NULL, 0 }, { "hme-range", required_argument, NULL, 0 }, + { "abr-ladder", required_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -365,336 +380,82 @@ { 0, 0, 0, 0 } }; -static void printVersion(x265_param *param, const x265_api* api) -{ - x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); - x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); -} + struct CLIOptions + { + InputFile* input; + ReconFile* recon; + OutputFile* output; + FILE* qpfile; + FILE* zoneFile; + FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ + const char* reconPlayCmd; + const x265_api* api; + x265_param* param; + x265_vmaf_data* vmafData; + bool bProgress; + bool bForceY4m; + bool bDither; + uint32_t seek; // number of frames to skip from the beginning + uint32_t framesToBeEncoded; // number of frames to encode + uint64_t totalbytes; + int64_t startTime; + int64_t prevUpdateTime; -static void showHelp(x265_param *param) -{ - int level = param->logLevel; + int argCnt; + char** argString; -#define OPT(value) (value ? "enabled" : "disabled") -#define H0 printf -#define H1 if (level >= X265_LOG_DEBUG) printf + /* ABR ladder settings */ + bool isAbrLadderConfig; + bool enableScaler; + char* encName; + char* reuseName; + uint32_t encId; + int refId; + uint32_t loadLevel; + uint32_t saveLevel; + uint32_t numRefs; - H0("\nSyntax: x265 [options] infile [-o] outfile\n"); - H0(" infile can be YUV or Y4M\n"); - H0(" outfile is raw HEVC bitstream\n"); - H0("\nExecutable Options:\n"); - H0("-h/--help Show this help text and exit\n"); - H0(" --fullhelp Show all options and exit\n"); - H0("-V/--version Show version info and exit\n"); - H0("\nOutput Options:\n"); - H0("-o/--output <filename> Bitstream output file name\n"); - H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); - H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]); - H0(" --no-progress Disable CLI progress reports\n"); - H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); - H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); - H0("\nInput Options:\n"); - H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); - H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); - H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); - H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n"); - H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); - H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); - H1(" 0 - i400 (4:0:0 monochrome)\n"); - H1(" 1 - i420 (4:2:0 default)\n"); - H1(" 2 - i422 (4:2:2)\n"); - H1(" 3 - i444 (4:4:4)\n"); -#if ENABLE_HDR10_PLUS - H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); - H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); -#endif - H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); - H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" - " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); - H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); - H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); - H0(" --seek <integer> First frame to encode\n"); - H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); - H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT( param->bField)); - H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); - H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); - H0("\nQuality reporting metrics:\n"); - H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); - H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); - H0("\nProfile, Level, Tier:\n"); - H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); - H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); - H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); - H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); - H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); - H0("\nThreading, performance:\n"); - H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); - H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); - H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); - H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); - H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); - H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); - H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); - H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n"); - H0("\nPresets:\n"); - H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); - H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); - H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); - H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); - H0("\nQuad-Tree size and depth:\n"); - H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); - H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); - H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); - H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); - H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); - H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); - H0("\nAnalysis:\n"); - H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); - H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); - H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); - H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); - H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); - H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); - H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); - H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); - H0(" --[no-]rskip Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip)); - H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); - H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); - H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); - H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); - H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" - " - 1: force the partitions if CTU information is present\n" - " - 2: functionality of (1) and reduce qp if CTU information has changed\n" - " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" - " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); - H0("\nCoding tools:\n"); - H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); - H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); - H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); - H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); - H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); - H0("\nTemporal / motion search options:\n"); - H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); - H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); - H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); - H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); - H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); - H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); - H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); - H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); - H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); - H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); - H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); - H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.