Projects
Essentials
x265
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 39
View file
x265.changes
Changed
@@ -1,4 +1,40 @@ ------------------------------------------------------------------- +Mon Jun 1 17:51:22 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> + +- Update to version 3.4 + New features: + * Edge-aware quadtree partitioning to terminate CU depth + recursion based on edge information. --rskip level 2 enables + the feature and --rskip-edge-threshold denotes the minimum + expected edge-density percentage within the CU, below which + the recursion is skipped. Experimental feature. + * Application-level feature --abr-ladder for automating + efficient ABR ladder generation. Shows ~65% savings in the + over-all turn-around time required for the generation of a + typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 + CPU @ 2.70GHz over a sequential ABR-ladder generation + approach that leverages save-load architecture. + Enhancements to existing features: + * Improved efficiency in 2-pass rate-control algorithm. The + savings in the bitrate is ~1.72% with visual improvement in + quality in the initial 1-2 secs. + Encoder enhancements: + * Faster ARM64 encodes enabled by ASM contributions from + Huawei. The speed-up over no-asm version for 1080p encodes @ + medium preset is ~15% in a 16 core H/W. + * Strict VBV conformance in zone encoding. + Bug fixes: + * Multi-pass encode failures with --frame-dup. + * Corrupted bitstreams with --hist-scenecut when input depth + and internal bit-depth differ. + * Incorrect analysis propagation in multi-level save-load + architecture. + * Failure in detecting NUMA packages installed in non-standard + directories. + +- Refreshed arm.patch + +------------------------------------------------------------------- Sat Mar 28 14:28:56 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> - Update to version 3.3
View file
x265.spec
Changed
@@ -17,11 +17,11 @@ # -%define sover 188 +%define sover 192 %define libname lib%{name} %define libsoname %{libname}-%{sover} Name: x265 -Version: 3.3 +Version: 3.4 Release: 0 Summary: A free h265/HEVC encoder - encoder binary License: GPL-2.0-or-later @@ -67,7 +67,6 @@ %patch0 -p1 %patch1 -p1 %patch2 -p1 - sed -i -e "s/0.0/%{sover}.0/g" source/cmake/version.cmake
View file
arm.patch
Changed
@@ -1,8 +1,8 @@ -Index: x265_2.2/source/CMakeLists.txt +Index: x265_3.4/source/CMakeLists.txt =================================================================== ---- x265_2.2.orig/source/CMakeLists.txt -+++ x265_2.2/source/CMakeLists.txt -@@ -65,15 +65,22 @@ elseif(POWERMATCH GREATER "-1") +--- x265_3.4.orig/source/CMakeLists.txt ++++ x265_3.4/source/CMakeLists.txt +@@ -64,26 +64,26 @@ elseif(POWERMATCH GREATER "-1") add_definitions(-DPPC64=1) message(STATUS "Detected POWER PPC64 target processor") endif() @@ -12,41 +12,62 @@ - else() - set(CROSS_COMPILE_ARM 0) - endif() -- message(STATUS "Detected ARM target processor") - set(ARM 1) -- add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) +- if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) +- message(STATUS "Detected ARM64 target processor") +- set(ARM64 1) +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) +- else() +- message(STATUS "Detected ARM target processor") +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) +- endif() +elseif(${SYSPROC} MATCHES "armv5.*") + message(STATUS "Detected ARMV5 system processor") + set(ARMV5 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=0 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv6l") + message(STATUS "Detected ARMV6 system processor") + set(ARMV6 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv7l") + message(STATUS "Detected ARMV7 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "aarch64") + message(STATUS "Detected AArch64 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") -@@ -208,18 +215,9 @@ if(GCC) + endif() +- + if(UNIX) + list(APPEND PLATFORM_LIBS pthread) + find_library(LIBRT rt) +@@ -238,28 +238,9 @@ if(GCC) endif() endif() endif() - if(ARM AND CROSS_COMPILE_ARM) -- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) +- else() +- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- endif() +- message(STATUS "cross compile arm") - elseif(ARM) -- find_package(Neon) -- if(CPU_HAS_NEON) -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) - add_definitions(-DHAVE_NEON) - else() -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- find_package(Neon) +- if(CPU_HAS_NEON) +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- add_definitions(-DHAVE_NEON) +- else() +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- endif() - endif() + if(ARMV7) + add_definitions(-fPIC) @@ -55,11 +76,11 @@ if(FPROFILE_GENERATE) if(INTEL_CXX) add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}") -Index: x265_2.2/source/common/cpu.cpp +Index: x265_3.4/source/common/cpu.cpp =================================================================== ---- x265_2.2.orig/source/common/cpu.cpp -+++ x265_2.2/source/common/cpu.cpp -@@ -37,7 +37,7 @@ +--- x265_3.4.orig/source/common/cpu.cpp ++++ x265_3.4/source/common/cpu.cpp +@@ -39,7 +39,7 @@ #include <machine/cpu.h> #endif @@ -68,7 +89,7 @@ #include <signal.h> #include <setjmp.h> static sigjmp_buf jmpbuf; -@@ -344,7 +344,6 @@ uint32_t cpu_detect(void) +@@ -350,7 +350,6 @@ uint32_t cpu_detect(bool benableavx512) } canjump = 1; @@ -76,7 +97,7 @@ canjump = 0; signal(SIGILL, oldsig); #endif // if !HAVE_NEON -@@ -360,7 +359,7 @@ uint32_t cpu_detect(void) +@@ -366,7 +365,7 @@ uint32_t cpu_detect(bool benableavx512) // which may result in incorrect detection and the counters stuck enabled. // right now Apple does not seem to support performance counters for this test #ifndef __MACH__ @@ -84,4 +105,4 @@ + //flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) - #endif // if HAVE_ARMV6 + #elif X265_ARCH_ARM64
View file
baselibs.conf
Changed
@@ -1,1 +1,1 @@ -libx265-179 +libx265-192
View file
x265_3.3.tar.gz/.hg_archival.txt -> x265_3.4.tar.gz/.hg_archival.txt
Changed
@@ -1,5 +1,4 @@ repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf -node: f94b0d32737d40b2b9a9d74df57fee45e6be5cb0 -branch: Release_3.3 -latesttag: 3.3 -latesttagdistance: 1 +node: 2a65b720985096bcb1664f7cb05c3d04aeb576f5 +branch: Release_3.4 +tag: 3.4
View file
x265_3.3.tar.gz/.hgtags -> x265_3.4.tar.gz/.hgtags
Changed
@@ -40,3 +40,4 @@ 5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1 96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2 057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3 +ee92f36782800f145970131e01c79955a3ed5c10 3.4_RC1
View file
x265_3.4.tar.gz/build/aarch64-linux/crosscompile.cmake
Added
@@ -0,0 +1,15 @@ +# CMake toolchain file for cross compiling x265 for aarch64 +# This feature is only supported as experimental. Use with caution. +# Please report bugs on bitbucket +# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source + +set(CROSS_COMPILE_ARM 1) +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +# specify the cross compiler +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) + +# specify the target environment +SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
View file
x265_3.4.tar.gz/build/aarch64-linux/make-Makefiles.bash
Added
@@ -0,0 +1,4 @@ +#!/bin/bash +# Run this from within a bash shell + +cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source
View file
x265_3.3.tar.gz/doc/reST/cli.rst -> x265_3.4.tar.gz/doc/reST/cli.rst
Changed
@@ -107,6 +107,9 @@ **BufferFillFinal** Buffer bits available after removing the frame out of CPB. + **UnclippedBufferFillFinal** Unclipped buffer bits available after removing the frame + out of CPB only used for csv logging purpose. + **Latency** Latency in terms of number of frames between when the frame was given in and when the frame is given out. @@ -842,15 +845,31 @@ Measure 2Nx2N merge candidates first; if no residual is found, additional modes at that depth are not analysed. Default disabled -.. option:: --rskip, --no-rskip +.. option:: --rskip <0|1|2> + + This option determines early exit from CU depth recursion in modes 1 and 2. When a skip CU is + found, additional heuristics (depending on the RD level and rskip mode) are used to decide whether + to terminate recursion. The following table summarizes the behavior. + + +----------+------------+----------------------------------------------------------------+ + | RD Level | Rskip Mode | Skip Recursion Heuristic | + +==========+============+================================================================+ + | 0 - 4 | 1 | Neighbour costs and CU homogenity. | + +----------+------------+----------------------------------------------------------------+ + | 5 - 6 | 1 | Comparison with inter2Nx2N. | + +----------+------------+----------------------------------------------------------------+ + | 0 - 6 | 2 | CU edge density. | + +----------+------------+----------------------------------------------------------------+ + + Provides minimal quality degradation at good performance gains for non-zero modes. + :option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used. + This is a integer value representing the edge-density percentage within the CU. Internally normalized to a number between 0.0 to 1.0 in x265. + Recommended low thresholds for slow encodes and high for fast encodes. - This option determines early exit from CU depth recursion. When a skip CU is - found, additional heuristics (depending on rd-level) are used to decide whether - to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, - while at rdlevels 4 and neighbour costs are used to skip recursion. - Provides minimal quality degradation at good performance gains when enabled. +.. option:: --rskip-edge-threshold <0..100> - Default: enabled, disabled for :option:`--tune grain` + Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. + Default: 5, requires :option:`--rskip mode 2` to be enabled. .. option:: --splitrd-skip, --no-splitrd-skip @@ -2501,6 +2520,28 @@ --recon-y4m-exec "ffplay -i pipe:0 -autoexit" **CLI ONLY** + +ABR-ladder Options +================== + +.. option:: --abr-ladder <filename> + + File containing the encoder configurations to generate ABR ladder. + The format of each line is: + + **<encID:reuse-level:refID> <CLI>** + + where, encID indicates the unique name given to the encode, refID indicates + the name of the encode from which analysis info has to be re-used ( set to 'nil' + if analysis reuse isn't preferred ), and reuse-level indicates the level ( :option:`--analysis-load-reuse-level`) + at which analysis info has to be reused. + + A sample config file is available in `the downloads page <https://bitbucket.org/multicoreware/x265/downloads/Sample_ABR_ladder_config>`_ + + Default: Disabled ( Conventional single encode generation ). Experimental feature. + + **CLI ONLY** + SVT-HEVC Encoder Options ========================
View file
x265_3.3.tar.gz/doc/reST/releasenotes.rst -> x265_3.4.tar.gz/doc/reST/releasenotes.rst
Changed
@@ -2,6 +2,32 @@ Release Notes ************* +Version 3.4 +=========== + +Release date - 29th May, 2020. + +New features +------------ +1. **Edge-aware quadtree partitioning** to terminate CU depth recursion based on edge information. :option:`--rskip` level 2 enables the feature and :option:`--rskip-edge-threshold` denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. Experimental feature. +2. Application-level feature :option:`--abr-ladder` for automating efficient ABR ladder generation. Shows ~65% savings in the over-all turn-around time required for the generation of a typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz over a sequential ABR-ladder generation approach that leverages save-load architecture. + +Enhancements to existing features +--------------------------------- +1. Improved efficiency in 2-pass rate-control algorithm. The savings in the bitrate is ~1.72% with visual improvement in quality in the initial 1-2 secs. + +Encoder enhancements +-------------------- +1. Faster ARM64 encodes enabled by ASM contributions from Huawei. The speed-up over no-asm version for 1080p encodes @ medium preset is ~15% in a 16 core H/W. +2. Strict VBV conformance in zone encoding. + +Bug fixes +--------- +1. Multi-pass encode failures with :option:`--frame-dup`. +2. Corrupted bitstreams with :option:`--hist-scenecut` when input depth and internal bit-depth differ. +3. Incorrect analysis propagation in multi-level save-load architecture. +4. Failure in detecting NUMA packages installed in non-standard directories. + Version 3.3 ===========
View file
x265_3.3.tar.gz/source/CMakeLists.txt -> x265_3.4.tar.gz/source/CMakeLists.txt
Changed
@@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 188) +set(X265_BUILD 192) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" @@ -40,7 +40,7 @@ # System architecture detection string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC) set(X86_ALIASES x86 i386 i686 x86_64 amd64) -set(ARM_ALIASES armv6l armv7l) +set(ARM_ALIASES armv6l armv7l aarch64) list(FIND X86_ALIASES "${SYSPROC}" X86MATCH) list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH) set(POWER_ALIASES ppc64 ppc64le) @@ -70,9 +70,15 @@ else() set(CROSS_COMPILE_ARM 0) endif() - message(STATUS "Detected ARM target processor") set(ARM 1) - add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) + if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) + message(STATUS "Detected ARM64 target processor") + set(ARM64 1) + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) + else() + message(STATUS "Detected ARM target processor") + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) + endif() else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") @@ -95,6 +101,8 @@ if(NUMA_FOUND) link_directories(${NUMA_LIBRARY_DIR}) list(APPEND CMAKE_REQUIRED_LIBRARIES numa) + list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR}) + list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}") check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2) if(NUMA_V2) add_definitions(-DHAVE_LIBNUMA) @@ -231,14 +239,24 @@ endif() endif() if(ARM AND CROSS_COMPILE_ARM) - set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) + else() + set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + endif() + message(STATUS "cross compile arm") elseif(ARM) - find_package(Neon) - if(CPU_HAS_NEON) - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) add_definitions(-DHAVE_NEON) else() - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + find_package(Neon) + if(CPU_HAS_NEON) + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + add_definitions(-DHAVE_NEON) + else() + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + endif() endif() endif() add_definitions(${ARM_ARGS}) @@ -518,7 +536,11 @@ # compile ARM arch asm files here enable_language(ASM) foreach(ASM ${ARM_ASMS}) - set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + if(ARM64) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM}) + else() + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + endif() list(APPEND ASM_SRCS ${ASM_SRC}) list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) add_custom_command( @@ -725,16 +747,16 @@ # Xcode seems unable to link the CLI with libs, so link as one targget if(ENABLE_HDR10_PLUS) add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS}) else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS}) endif() else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE} - ${ExportDefs} x265.cpp x265.h x265cli.h) + ${ExportDefs} x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h) if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX) # The CLI cannot link to the shared library on Windows, it # requires internal APIs not exported from the DLL
View file
x265_3.4.tar.gz/source/abrEncApp.cpp
Added
@@ -0,0 +1,1108 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "abrEncApp.h" +#include "mv.h" +#include "slice.h" +#include "param.h" + +#include <signal.h> +#include <errno.h> + +#include <queue> + +using namespace X265_NS; + +/* Ctrl-C handler */ +static volatile sig_atomic_t b_ctrl_c /* = 0 */; +static void sigint_handler(int) +{ + b_ctrl_c = 1; +} + +namespace X265_NS { + // private namespace +#define X265_INPUT_QUEUE_SIZE 250 + + AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret) + { + m_numEncodes = numEncodes; + m_numActiveEncodes.set(numEncodes); + m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1; + m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes); + + for (uint8_t i = 0; i < m_numEncodes; i++) + { + m_passEnc[i] = new PassEncoder(i, cliopt[i], this); + if (!m_passEnc[i]) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n"); + ret = 4; + } + m_passEnc[i]->init(ret); + } + + if (!allocBuffers()) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n"); + ret = 4; + } + + /* start passEncoder worker threads */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + m_passEnc[pass]->startThreads(); + } + + bool AbrEncoder::allocBuffers() + { + m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes); + m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes); + + m_picWriteCnt = new ThreadSafeInteger[m_numEncodes]; + m_picReadCnt = new ThreadSafeInteger[m_numEncodes]; + m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes]; + m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes]; + + m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_readFlag = X265_MALLOC(int*, m_numEncodes); + + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize); + for (uint32_t idx = 0; idx < m_queueSize; idx++) + { + m_inputPicBuffer[pass][idx] = x265_picture_alloc(); + x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]); + } + + m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data, m_queueSize); + m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize]; + m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize]; + m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize]; + m_readFlag[pass] = X265_MALLOC(int, m_queueSize); + } + return true; + } + + void AbrEncoder::destroy() + { + x265_cleanup(); /* Free library singletons */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + for (uint32_t index = 0; index < m_queueSize; index++) + { + X265_FREE(m_inputPicBuffer[pass][index]->planes[0]); + x265_picture_free(m_inputPicBuffer[pass][index]); + } + + X265_FREE(m_inputPicBuffer[pass]); + X265_FREE(m_analysisBuffer[pass]); + X265_FREE(m_readFlag[pass]); + delete[] m_picIdxReadCnt[pass]; + delete[] m_analysisWrite[pass]; + delete[] m_analysisRead[pass]; + m_passEnc[pass]->destroy(); + delete m_passEnc[pass]; + } + X265_FREE(m_inputPicBuffer); + X265_FREE(m_analysisBuffer); + X265_FREE(m_readFlag); + + delete[] m_picWriteCnt; + delete[] m_picReadCnt; + delete[] m_analysisWriteCnt; + delete[] m_analysisReadCnt; + + X265_FREE(m_picIdxReadCnt); + X265_FREE(m_analysisWrite); + X265_FREE(m_analysisRead); + + X265_FREE(m_passEnc); + } + + PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent) + { + m_id = id; + m_cliopt = cliopt; + m_parent = parent; + if(!(m_cliopt.enableScaler && m_id)) + m_input = m_cliopt.input; + m_param = cliopt.param; + m_inputOver = false; + m_lastIdx = -1; + m_encoder = NULL; + m_scaler = NULL; + m_reader = NULL; + m_ret = 0; + } + + int PassEncoder::init(int &result) + { + if (m_parent->m_numEncodes > 1) + setReuseLevel(); + + if (!(m_cliopt.enableScaler && m_id)) + m_reader = new Reader(m_id, this); + else + { + VideoDesc *src = NULL, *dst = NULL; + dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth); + int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth; + int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight; + src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth); + if (src != NULL && dst != NULL) + { + m_scaler = new Scaler(0, 1, m_id, src, dst, this); + if (!m_scaler) + { + x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler"); + result = 4; + } + } + } + + /* note: we could try to acquire a different libx265 API here based on + * the profile found during option parsing, but it must be done before + * opening an encoder */ + + if (m_param) + m_encoder = m_cliopt.api->encoder_open(m_param); + if (!m_encoder) + { + x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n"); + m_ret = 2; + return -1; + } + + /* get the encoder parameters post-initialization */ + m_cliopt.api->encoder_parameters(m_encoder, m_param); + + return 1; + } + + void PassEncoder::setReuseLevel() + { + uint32_t r, padh = 0, padw = 0; + + m_param->confWinBottomOffset = m_param->confWinRightOffset = 0; + + m_param->analysisLoadReuseLevel = m_cliopt.loadLevel; + m_param->analysisSaveReuseLevel = m_cliopt.saveLevel; + m_param->analysisSave = m_cliopt.saveLevel ? "save.dat" : NULL; + m_param->analysisLoad = m_cliopt.loadLevel ? "load.dat" : NULL; + m_param->bUseAnalysisFile = 0; + + if (m_cliopt.loadLevel) + { + x265_param *refParam = m_parent->m_passEnc[m_cliopt.refId]->m_param; + + if (m_param->sourceHeight == (refParam->sourceHeight - refParam->confWinBottomOffset) && + m_param->sourceWidth == (refParam->sourceWidth - refParam->confWinRightOffset)) + { + m_parent->m_passEnc[m_id]->m_param->confWinBottomOffset = refParam->confWinBottomOffset; + m_parent->m_passEnc[m_id]->m_param->confWinRightOffset = refParam->confWinRightOffset; + } + else + { + int srcH = refParam->sourceHeight - refParam->confWinBottomOffset; + int srcW = refParam->sourceWidth - refParam->confWinRightOffset; + + double scaleFactorH = double(m_param->sourceHeight / srcH); + double scaleFactorW = double(m_param->sourceWidth / srcW); + + int absScaleFactorH = (int)(10 * scaleFactorH + 0.5); + int absScaleFactorW = (int)(10 * scaleFactorW + 0.5); + + if (absScaleFactorH == 20 && absScaleFactorW == 20) + { + m_param->scaleFactor = 2; + + m_parent->m_passEnc[m_id]->m_param->confWinBottomOffset = refParam->confWinBottomOffset * 2; + m_parent->m_passEnc[m_id]->m_param->confWinRightOffset = refParam->confWinRightOffset * 2; + + } + } + } + + int h = m_param->sourceHeight + m_param->confWinBottomOffset; + int w = m_param->sourceWidth + m_param->confWinRightOffset; + if (h & (m_param->minCUSize - 1)) + { + r = h & (m_param->minCUSize - 1); + padh = m_param->minCUSize - r; + m_param->confWinBottomOffset += padh; + + } + + if (w & (m_param->minCUSize - 1)) + { + r = w & (m_param->minCUSize - 1); + padw = m_param->minCUSize - r; + m_param->confWinRightOffset += padw; + } + } + + void PassEncoder::startThreads() + { + /* Start slave worker threads */ + m_threadActive = true; + start(); + /* Start reader threads*/ + if (m_reader != NULL) + { + m_reader->m_threadActive = true; + m_reader->start(); + } + /* Start scaling worker threads */ + if (m_scaler != NULL) + { + m_scaler->m_threadActive = true; + m_scaler->start(); + } + } + + void PassEncoder::copyInfo(x265_analysis_data * src) + { + + uint32_t written = m_parent->m_analysisWriteCnt[m_id].get(); + + int index = written % m_parent->m_queueSize; + //If all streams have read analysis data, reuse that position in Queue + + int read = m_parent->m_analysisRead[m_id][index].get(); + int write = m_parent->m_analysisWrite[m_id][index].get(); + + int overwrite = written / m_parent->m_queueSize; + bool emptyIdxFound = 0; + while (!emptyIdxFound && overwrite) + { + for (uint32_t i = 0; i < m_parent->m_queueSize; i++) + { + read = m_parent->m_analysisRead[m_id][i].get(); + write = m_parent->m_analysisWrite[m_id][i].get(); + write *= m_cliopt.numRefs; + + if (read == write) + { + index = i; + emptyIdxFound = 1; + } + } + } + + x265_analysis_data *m_analysisInfo = &m_parent->m_analysisBuffer[m_id][index]; + + memcpy(m_analysisInfo, src, sizeof(x265_analysis_data)); + x265_alloc_analysis_data(m_param, m_analysisInfo); + + bool isVbv = m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate; + if (m_param->bDisableLookahead && isVbv) + { + memcpy(m_analysisInfo->lookahead.intraSatdForVbv, src->lookahead.intraSatdForVbv, src->numCuInHeight * sizeof(uint32_t)); + memcpy(m_analysisInfo->lookahead.satdForVbv, src->lookahead.satdForVbv, src->numCuInHeight * sizeof(uint32_t)); + memcpy(m_analysisInfo->lookahead.intraVbvCost, src->lookahead.intraVbvCost, src->numCUsInFrame * sizeof(uint32_t)); + memcpy(m_analysisInfo->lookahead.vbvCost, src->lookahead.vbvCost, src->numCUsInFrame * sizeof(uint32_t)); + } + + if (src->sliceType == X265_TYPE_IDR || src->sliceType == X265_TYPE_I) + { + if (m_param->analysisSaveReuseLevel < 2) + goto ret; + x265_analysis_intra_data *intraDst, *intraSrc; + intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData; + intraSrc = (x265_analysis_intra_data*)src->intraData; + memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t) * src->depthBytes); + memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numCUsInFrame * src->numPartitions); + memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes); + memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes); + if (m_param->rc.cuTree) + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + } + else + { + bool bIntraInInter = (src->sliceType == X265_TYPE_P || m_param->bIntraInBFrames); + int numDir = src->sliceType == X265_TYPE_P ? 1 : 2; + memcpy(m_analysisInfo->wt, src->wt, sizeof(WeightParam) * 3 * numDir); + if (m_param->analysisSaveReuseLevel < 2) + goto ret; + x265_analysis_inter_data *interDst, *interSrc; + interDst = (x265_analysis_inter_data*)m_analysisInfo->interData; + interSrc = (x265_analysis_inter_data*)src->interData; + memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes); + memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes); + if (m_param->rc.cuTree) + memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + if (m_param->analysisSaveReuseLevel > 4) + { + memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes); + memcpy(interDst->mergeFlag, interSrc->mergeFlag, sizeof(uint8_t) * src->depthBytes); + if (m_param->analysisSaveReuseLevel == 10) + { + memcpy(interDst->interDir, interSrc->interDir, sizeof(uint8_t) * src->depthBytes); + for (int dir = 0; dir < numDir; dir++) + { + memcpy(interDst->mvpIdx[dir], interSrc->mvpIdx[dir], sizeof(uint8_t) * src->depthBytes); + memcpy(interDst->refIdx[dir], interSrc->refIdx[dir], sizeof(int8_t) * src->depthBytes); + memcpy(interDst->mv[dir], interSrc->mv[dir], sizeof(MV) * src->depthBytes); + } + if (bIntraInInter) + { + x265_analysis_intra_data *intraDst = (x265_analysis_intra_data*)m_analysisInfo->intraData; + x265_analysis_intra_data *intraSrc = (x265_analysis_intra_data*)src->intraData; + memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) * src->numPartitions * src->numCUsInFrame); + memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes); + } + } + } + if (m_param->analysisSaveReuseLevel != 10) + memcpy(interDst->ref, interSrc->ref, sizeof(int32_t) * src->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir); + } + +ret: + //increment analysis Write counter + m_parent->m_analysisWriteCnt[m_id].incr(); + m_parent->m_analysisWrite[m_id][index].incr(); + return; + } + + + bool PassEncoder::readPicture(x265_picture *dstPic) + { + /*Check and wait if there any input frames to read*/ + int ipread = m_parent->m_picReadCnt[m_id].get(); + int ipwrite = m_parent->m_picWriteCnt[m_id].get(); + + bool isAbrLoad = m_cliopt.loadLevel && (m_parent->m_numEncodes > 1); + while (!m_inputOver && (ipread == ipwrite)) + { + ipwrite = m_parent->m_picWriteCnt[m_id].waitForChange(ipwrite); + } + + if (m_threadActive && ipread < ipwrite) + { + /*Get input index to read from inputQueue. If doesn't need analysis info, it need not wait to fetch poc from analysisQueue*/ + int readPos = ipread % m_parent->m_queueSize; + x265_analysis_data* analysisData = 0; + + if (isAbrLoad) + { + /*If stream is master of each slave pass, then fetch analysis data from prev pass*/ + int analysisQId = m_cliopt.refId; + /*Check and wait if there any analysis Data to read*/ + int analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].get(); + int written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs; + int analysisRead = m_parent->m_analysisReadCnt[analysisQId].get(); + + while (m_threadActive && written == analysisRead) + { + analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].waitForChange(analysisWrite); + written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs; + } + + if (analysisRead < written) + { + int analysisIdx = 0; + if (!m_param->bDisableLookahead) + { + bool analysisdRead = false; + while ((analysisRead < written) && !analysisdRead) + { + while (analysisWrite < ipread) + { + analysisWrite = m_parent->m_analysisWriteCnt[analysisQId].waitForChange(analysisWrite); + written = analysisWrite * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs; + } + for (uint32_t i = 0; i < m_parent->m_queueSize; i++) + { + analysisData = &m_parent->m_analysisBuffer[analysisQId][i]; + int read = m_parent->m_analysisRead[analysisQId][i].get(); + int write = m_parent->m_analysisWrite[analysisQId][i].get() * m_parent->m_passEnc[analysisQId]->m_cliopt.numRefs; + if ((analysisData->poc == (uint32_t)(ipread)) && (read < write)) + { + analysisIdx = i; + analysisdRead = true; + break; + } + } + } + } + else + { + analysisIdx = analysisRead % m_parent->m_queueSize; + analysisData = &m_parent->m_analysisBuffer[analysisQId][analysisIdx]; + readPos = analysisData->poc % m_parent->m_queueSize; + while ((ipwrite < readPos) || ((ipwrite - 1) < (int)analysisData->poc)) + { + ipwrite = m_parent->m_picWriteCnt[m_id].waitForChange(ipwrite); + } + } + + m_lastIdx = analysisIdx; + } + else + return false; + } + + + x265_picture *srcPic = (x265_picture*)(m_parent->m_inputPicBuffer[m_id][readPos]); + + x265_picture *pic = (x265_picture*)(dstPic); + pic->colorSpace = srcPic->colorSpace; + pic->bitDepth = srcPic->bitDepth; + pic->framesize = srcPic->framesize; + pic->height = srcPic->height; + pic->pts = srcPic->pts; + pic->dts = srcPic->dts; + pic->reorderedPts = srcPic->reorderedPts; + pic->width = srcPic->width; + pic->analysisData = srcPic->analysisData; + pic->userSEI = srcPic->userSEI; + pic->stride[0] = srcPic->stride[0]; + pic->stride[1] = srcPic->stride[1]; + pic->stride[2] = srcPic->stride[2]; + pic->planes[0] = srcPic->planes[0]; + pic->planes[1] = srcPic->planes[1]; + pic->planes[2] = srcPic->planes[2]; + if (isAbrLoad) + pic->analysisData = *analysisData; + return true; + } + else + return false; + } + + void PassEncoder::threadMain() + { + THREAD_NAME("PassEncoder", m_id); + + while (m_threadActive) + { + +#if ENABLE_LIBVMAF + x265_vmaf_data* vmafdata = m_cliopt.vmafData; +#endif + /* This allows muxers to modify bitstream format */ + m_cliopt.output->setParam(m_param); + const x265_api* api = m_cliopt.api; + ReconPlay* reconPlay = NULL; + if (m_cliopt.reconPlayCmd) + reconPlay = new ReconPlay(m_cliopt.reconPlayCmd, *m_param); + char* profileName = m_cliopt.encName ? m_cliopt.encName : (char *)"x265"; + + if (m_cliopt.zoneFile) + { + if (!m_cliopt.parseZoneFile()) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to parse zonefile in %s\n", profileName); + fclose(m_cliopt.zoneFile); + m_cliopt.zoneFile = NULL; + } + } + + if (signal(SIGINT, sigint_handler) == SIG_ERR) + x265_log(m_param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s in %s\n", + strerror(errno), profileName); + + x265_picture pic_orig, pic_out; + x265_picture *pic_in = &pic_orig; + /* Allocate recon picture if analysis save/load is enabled */ + std::priority_queue<int64_t>* pts_queue = m_cliopt.output->needPTS() ? new std::priority_queue<int64_t>() : NULL; + x265_picture *pic_recon = (m_cliopt.recon || m_param->analysisSave || m_param->analysisLoad || pts_queue || reconPlay || m_param->csvLogLevel) ? &pic_out : NULL; + uint32_t inFrameCount = 0; + uint32_t outFrameCount = 0; + x265_nal *p_nal; + x265_stats stats; + uint32_t nal; + int16_t *errorBuf = NULL; + bool bDolbyVisionRPU = false; + uint8_t *rpuPayload = NULL; + int inputPicNum = 1; + x265_picture picField1, picField2; + x265_analysis_data* analysisInfo = (x265_analysis_data*)(&pic_out.analysisData); + bool isAbrSave = m_cliopt.saveLevel && (m_parent->m_numEncodes > 1); + + if (!m_param->bRepeatHeaders && !m_param->bEnableSvtHevc) + { + if (api->encoder_headers(m_encoder, &p_nal, &nal) < 0) + { + x265_log(m_param, X265_LOG_ERROR, "Failure generating stream headers in %s\n", profileName); + m_ret = 3; + goto fail; + } + else + m_cliopt.totalbytes += m_cliopt.output->writeHeaders(p_nal, nal); + } + + if (m_param->bField && m_param->interlaceMode) + { + api->picture_init(m_param, &picField1); + api->picture_init(m_param, &picField2); + // return back the original height of input + m_param->sourceHeight *= 2; + api->picture_init(m_param, &pic_orig); + } + else + api->picture_init(m_param, &pic_orig); + + if (m_param->dolbyProfile && m_cliopt.dolbyVisionRpu) + { + rpuPayload = X265_MALLOC(uint8_t, 1024); + pic_in->rpu.payload = rpuPayload; + if (pic_in->rpu.payload) + bDolbyVisionRPU = true; + } + + if (m_cliopt.bDither) + { + errorBuf = X265_MALLOC(int16_t, m_param->sourceWidth + 1); + if (errorBuf) + memset(errorBuf, 0, (m_param->sourceWidth + 1) * sizeof(int16_t)); + else + m_cliopt.bDither = false; + } + + // main encoder loop + while (pic_in && !b_ctrl_c) + { + pic_orig.poc = (m_param->bField && m_param->interlaceMode) ? inFrameCount * 2 : inFrameCount; + if (m_cliopt.qpfile) + { + if (!m_cliopt.parseQPFile(pic_orig)) + { + x265_log(NULL, X265_LOG_ERROR, "can't parse qpfile for frame %d in %s\n", + pic_in->poc, profileName); + fclose(m_cliopt.qpfile); + m_cliopt.qpfile = NULL; + } + } + + if (m_cliopt.framesToBeEncoded && inFrameCount >= m_cliopt.framesToBeEncoded) + pic_in = NULL; + else if (readPicture(pic_in)) + inFrameCount++; + else + pic_in = NULL; + + if (pic_in) + { + if (pic_in->bitDepth > m_param->internalBitDepth && m_cliopt.bDither) + { + x265_dither_image(pic_in, m_cliopt.input->getWidth(), m_cliopt.input->getHeight(), errorBuf, m_param->internalBitDepth); + pic_in->bitDepth = m_param->internalBitDepth; + } + /* Overwrite PTS */ + pic_in->pts = pic_in->poc; + + // convert to field + if (m_param->bField && m_param->interlaceMode) + { + int height = pic_in->height >> 1; + + int static bCreated = 0; + if (bCreated == 0) + { + bCreated = 1; + inputPicNum = 2; + picField1.fieldNum = 1; + picField2.fieldNum = 2; + + picField1.bitDepth = picField2.bitDepth = pic_in->bitDepth; + picField1.colorSpace = picField2.colorSpace = pic_in->colorSpace; + picField1.height = picField2.height = pic_in->height >> 1; + picField1.framesize = picField2.framesize = pic_in->framesize >> 1; + + size_t fieldFrameSize = (size_t)pic_in->framesize >> 1; + char* field1Buf = X265_MALLOC(char, fieldFrameSize); + char* field2Buf = X265_MALLOC(char, fieldFrameSize); + + int stride = picField1.stride[0] = picField2.stride[0] = pic_in->stride[0]; + uint64_t framesize = stride * (height >> x265_cli_csps[pic_in->colorSpace].height[0]); + picField1.planes[0] = field1Buf; + picField2.planes[0] = field2Buf; + for (int i = 1; i < x265_cli_csps[pic_in->colorSpace].planes; i++) + { + picField1.planes[i] = field1Buf + framesize; + picField2.planes[i] = field2Buf + framesize; + + stride = picField1.stride[i] = picField2.stride[i] = pic_in->stride[i]; + framesize += (stride * (height >> x265_cli_csps[pic_in->colorSpace].height[i])); + } + assert(framesize == picField1.framesize); + } + + picField1.pts = picField1.poc = pic_in->poc; + picField2.pts = picField2.poc = pic_in->poc + 1; + + picField1.userSEI = picField2.userSEI = pic_in->userSEI; + + //if (pic_in->userData) + //{ + // // Have to handle userData here + //} + + if (pic_in->framesize) + { + for (int i = 0; i < x265_cli_csps[pic_in->colorSpace].planes; i++) + { + char* srcP1 = (char*)pic_in->planes[i]; + char* srcP2 = (char*)pic_in->planes[i] + pic_in->stride[i]; + char* p1 = (char*)picField1.planes[i]; + char* p2 = (char*)picField2.planes[i]; + + int stride = picField1.stride[i]; + + for (int y = 0; y < (height >> x265_cli_csps[pic_in->colorSpace].height[i]); y++) + { + memcpy(p1, srcP1, stride); + memcpy(p2, srcP2, stride); + srcP1 += 2 * stride; + srcP2 += 2 * stride; + p1 += stride; + p2 += stride; + } + } + } + } + + if (bDolbyVisionRPU) + { + if (m_param->bField && m_param->interlaceMode) + { + if (m_cliopt.rpuParser(&picField1) > 0) + goto fail; + if (m_cliopt.rpuParser(&picField2) > 0) + goto fail; + } + else + { + if (m_cliopt.rpuParser(pic_in) > 0) + goto fail; + } + } + } + + for (int inputNum = 0; inputNum < inputPicNum; inputNum++) + { + x265_picture *picInput = NULL; + if (inputPicNum == 2) + picInput = pic_in ? (inputNum ? &picField2 : &picField1) : NULL; + else + picInput = pic_in; + + int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, picInput, pic_recon); + + int idx = (inFrameCount - 1) % m_parent->m_queueSize; + m_parent->m_picIdxReadCnt[m_id][idx].incr(); + m_parent->m_picReadCnt[m_id].incr(); + if (m_cliopt.loadLevel && picInput) + { + m_parent->m_analysisReadCnt[m_cliopt.refId].incr(); + m_parent->m_analysisRead[m_cliopt.refId][m_lastIdx].incr(); + } + + if (numEncoded < 0) + { + b_ctrl_c = 1; + m_ret = 4; + break; + } + + if (reconPlay && numEncoded) + reconPlay->writePicture(*pic_recon); + + outFrameCount += numEncoded; + + if (isAbrSave && numEncoded) + { + copyInfo(analysisInfo); + } + + if (numEncoded && pic_recon && m_cliopt.recon) + m_cliopt.recon->writePicture(pic_out); + if (nal) + { + m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out); + if (pts_queue) + { + pts_queue->push(-pic_out.pts); + if (pts_queue->size() > 2) + pts_queue->pop(); + } + } + m_cliopt.printStatus(outFrameCount); + } + } + + /* Flush the encoder */ + while (!b_ctrl_c) + { + int numEncoded = api->encoder_encode(m_encoder, &p_nal, &nal, NULL, pic_recon); + if (numEncoded < 0) + { + m_ret = 4; + break; + } + + if (reconPlay && numEncoded) + reconPlay->writePicture(*pic_recon); + + outFrameCount += numEncoded; + if (isAbrSave && numEncoded) + { + copyInfo(analysisInfo); + } + + if (numEncoded && pic_recon && m_cliopt.recon) + m_cliopt.recon->writePicture(pic_out); + if (nal) + { + m_cliopt.totalbytes += m_cliopt.output->writeFrame(p_nal, nal, pic_out); + if (pts_queue) + { + pts_queue->push(-pic_out.pts); + if (pts_queue->size() > 2) + pts_queue->pop(); + } + } + + m_cliopt.printStatus(outFrameCount); + + if (!numEncoded) + break; + } + + if (bDolbyVisionRPU) + { + if (fgetc(m_cliopt.dolbyVisionRpu) != EOF) + x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU count is greater than frame count in %s\n", + profileName); + x265_log(NULL, X265_LOG_INFO, "VES muxing with Dolby Vision RPU file successful in %s\n", + profileName); + } + + /* clear progress report */ + if (m_cliopt.bProgress) + fprintf(stderr, "%*s\r", 80, " "); + + fail: + + delete reconPlay; + + api->encoder_get_stats(m_encoder, &stats, sizeof(stats)); + if (m_param->csvfn && !b_ctrl_c) +#if ENABLE_LIBVMAF + api->vmaf_encoder_log(m_encoder, m_cliopt.argCount, m_cliopt.argString, m_cliopt.param, vmafdata); +#else + api->encoder_log(m_encoder, m_cliopt.argCnt, m_cliopt.argString); +#endif + api->encoder_close(m_encoder); + + int64_t second_largest_pts = 0; + int64_t largest_pts = 0; + if (pts_queue && pts_queue->size() >= 2) + { + second_largest_pts = -pts_queue->top(); + pts_queue->pop(); + largest_pts = -pts_queue->top(); + pts_queue->pop(); + delete pts_queue; + pts_queue = NULL; + } + m_cliopt.output->closeFile(largest_pts, second_largest_pts); + + if (b_ctrl_c) + general_log(m_param, NULL, X265_LOG_INFO, "aborted at input frame %d, output frame %d in %s\n", + m_cliopt.seek + inFrameCount, stats.encodedPictureCount, profileName); + + api->param_free(m_param); + + X265_FREE(errorBuf); + X265_FREE(rpuPayload); + + m_threadActive = false; + m_parent->m_numActiveEncodes.decr(); + } + } + + void PassEncoder::destroy() + { + stop(); + if (m_reader) + { + m_reader->stop(); + delete m_reader; + } + else + { + m_scaler->stop(); + m_scaler->destroy(); + delete m_scaler; + } + } + + Scaler::Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc *dst, PassEncoder *parentEnc) + { + m_parentEnc = parentEnc; + m_id = id; + m_srcFormat = src; + m_dstFormat = dst; + m_threadActive = false; + m_scaleFrameSize = 0; + m_filterManager = NULL; + m_threadId = threadId; + m_threadTotal = threadNum; + + int csp = dst->m_csp; + uint32_t pixelbytes = dst->m_inputDepth > 8 ? 2 : 1; + for (int i = 0; i < x265_cli_csps[csp].planes; i++) + { + int w = dst->m_width >> x265_cli_csps[csp].width[i]; + int h = dst->m_height >> x265_cli_csps[csp].height[i]; + m_scalePlanes[i] = w * h * pixelbytes; + m_scaleFrameSize += m_scalePlanes[i]; + } + + if (src->m_height != dst->m_height || src->m_width != dst->m_width) + { + m_filterManager = new ScalerFilterManager; + m_filterManager->init(4, m_srcFormat, m_dstFormat); + } + } + + bool Scaler::scalePic(x265_picture * destination, x265_picture * source) + { + if (!destination || !source) + return false; + x265_param* param = m_parentEnc->m_param; + int pixelBytes = m_dstFormat->m_inputDepth > 8 ? 2 : 1; + if (m_srcFormat->m_height != m_dstFormat->m_height || m_srcFormat->m_width != m_dstFormat->m_width) + { + void **srcPlane = NULL, **dstPlane = NULL; + int srcStride[3], dstStride[3]; + destination->bitDepth = source->bitDepth; + destination->colorSpace = source->colorSpace; + destination->pts = source->pts; + destination->dts = source->dts; + destination->reorderedPts = source->reorderedPts; + destination->poc = source->poc; + destination->userSEI = source->userSEI; + srcPlane = source->planes; + dstPlane = destination->planes; + srcStride[0] = source->stride[0]; + destination->stride[0] = m_dstFormat->m_width * pixelBytes; + dstStride[0] = destination->stride[0]; + if (param->internalCsp != X265_CSP_I400) + { + srcStride[1] = source->stride[1]; + srcStride[2] = source->stride[2]; + destination->stride[1] = destination->stride[0] >> x265_cli_csps[param->internalCsp].width[1]; + destination->stride[2] = destination->stride[0] >> x265_cli_csps[param->internalCsp].width[2]; + dstStride[1] = destination->stride[1]; + dstStride[2] = destination->stride[2]; + } + if (m_scaleFrameSize) + { + m_filterManager->scale_pic(srcPlane, dstPlane, srcStride, dstStride); + return true; + } + else + x265_log(param, X265_LOG_INFO, "Empty frame received\n"); + } + return false; + } + + void Scaler::threadMain() + { + THREAD_NAME("Scaler", m_id); + + /* unscaled picture is stored in the last index */ + uint32_t srcId = m_id - 1; + int QDepth = m_parentEnc->m_parent->m_queueSize; + while (!m_parentEnc->m_inputOver) + { + + uint32_t scaledWritten = m_parentEnc->m_parent->m_picWriteCnt[m_id].get(); + + if (m_parentEnc->m_cliopt.framesToBeEncoded && scaledWritten >= m_parentEnc->m_cliopt.framesToBeEncoded) + break; + + if (m_threadTotal > 1 && (m_threadId != scaledWritten % m_threadTotal)) + { + continue; + } + uint32_t written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get(); + + /*If all the input pictures are scaled by the current scale worker thread wait for input pictures*/ + while (m_threadActive && (scaledWritten == written)) { + written = m_parentEnc->m_parent->m_picWriteCnt[srcId].waitForChange(written); + } + + if (m_threadActive && scaledWritten < written) + { + + int scaledWriteIdx = scaledWritten % QDepth; + int overWritePicBuffer = scaledWritten / QDepth; + int read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][scaledWriteIdx].get(); + + while (overWritePicBuffer && read < overWritePicBuffer) + { + read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][scaledWriteIdx].waitForChange(read); + } + + if (!m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx]) + { + int framesize = 0; + int planesize[3]; + int csp = m_dstFormat->m_csp; + int stride[3]; + stride[0] = m_dstFormat->m_width; + stride[1] = stride[0] >> x265_cli_csps[csp].width[1]; + stride[2] = stride[0] >> x265_cli_csps[csp].width[2]; + for (int i = 0; i < x265_cli_csps[csp].planes; i++) + { + uint32_t h = m_dstFormat->m_height >> x265_cli_csps[csp].height[i]; + planesize[i] = h * stride[i]; + framesize += planesize[i]; + } + + m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx] = x265_picture_alloc(); + x265_picture_init(m_parentEnc->m_param, m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx]); + + ((x265_picture*)m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWritten % QDepth])->framesize = framesize; + for (int32_t j = 0; j < x265_cli_csps[csp].planes; j++) + { + m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWritten % QDepth]->planes[j] = X265_MALLOC(char, planesize[j]); + } + } + + x265_picture *srcPic = m_parentEnc->m_parent->m_inputPicBuffer[srcId][scaledWritten % QDepth]; + x265_picture* destPic = m_parentEnc->m_parent->m_inputPicBuffer[m_id][scaledWriteIdx]; + + // Enqueue this picture up with the current encoder so that it will asynchronously encode + if (!scalePic(destPic, srcPic)) + x265_log(NULL, X265_LOG_ERROR, "Unable to copy scaled input picture to input queue \n"); + else + m_parentEnc->m_parent->m_picWriteCnt[m_id].incr(); + m_scaledWriteCnt.incr(); + m_parentEnc->m_parent->m_picIdxReadCnt[srcId][scaledWriteIdx].incr(); + } + if (m_threadTotal > 1) + { + written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get(); + int totalWrite = written / m_threadTotal; + if (written % m_threadTotal > m_threadId) + totalWrite++; + if (totalWrite == m_scaledWriteCnt.get()) + { + m_parentEnc->m_parent->m_picWriteCnt[srcId].poke(); + m_parentEnc->m_parent->m_picWriteCnt[m_id].poke(); + break; + } + } + else + { + /* Once end of video is reached and all frames are scaled, release wait on picwritecount */ + scaledWritten = m_parentEnc->m_parent->m_picWriteCnt[m_id].get(); + written = m_parentEnc->m_parent->m_picWriteCnt[srcId].get(); + if (written == scaledWritten) + { + m_parentEnc->m_parent->m_picWriteCnt[srcId].poke(); + m_parentEnc->m_parent->m_picWriteCnt[m_id].poke(); + break; + } + } + + } + m_threadActive = false; + destroy(); + } + + Reader::Reader(int id, PassEncoder *parentEnc) + { + m_parentEnc = parentEnc; + m_id = id; + m_input = parentEnc->m_input; + } + + void Reader::threadMain() + { + THREAD_NAME("Reader", m_id); + + int QDepth = m_parentEnc->m_parent->m_queueSize; + x265_picture* src = x265_picture_alloc(); + x265_picture_init(m_parentEnc->m_param, src); + + while (m_threadActive) + { + uint32_t written = m_parentEnc->m_parent->m_picWriteCnt[m_id].get(); + uint32_t writeIdx = written % QDepth; + uint32_t read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][writeIdx].get(); + uint32_t overWritePicBuffer = written / QDepth; + + if (m_parentEnc->m_cliopt.framesToBeEncoded && written >= m_parentEnc->m_cliopt.framesToBeEncoded) + break; + + while (overWritePicBuffer && read < overWritePicBuffer) + { + read = m_parentEnc->m_parent->m_picIdxReadCnt[m_id][writeIdx].waitForChange(read); + } + + x265_picture* dest = m_parentEnc->m_parent->m_inputPicBuffer[m_id][writeIdx]; + if (m_input->readPicture(*src)) + { + dest->poc = src->poc; + dest->pts = src->pts; + dest->userSEI = src->userSEI; + dest->bitDepth = src->bitDepth; + dest->framesize = src->framesize; + dest->height = src->height; + dest->width = src->width; + dest->colorSpace = src->colorSpace; + dest->userSEI = src->userSEI; + dest->rpu.payload = src->rpu.payload; + dest->picStruct = src->picStruct; + dest->stride[0] = src->stride[0]; + dest->stride[1] = src->stride[1]; + dest->stride[2] = src->stride[2]; + + if (!dest->planes[0]) + dest->planes[0] = X265_MALLOC(char, dest->framesize); + + memcpy(dest->planes[0], src->planes[0], src->framesize * sizeof(char)); + dest->planes[1] = (char*)dest->planes[0] + src->stride[0] * src->height; + dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * (src->height >> x265_cli_csps[src->colorSpace].height[1]); + m_parentEnc->m_parent->m_picWriteCnt[m_id].incr(); + } + else + { + m_threadActive = false; + m_parentEnc->m_inputOver = true; + m_parentEnc->m_parent->m_picWriteCnt[m_id].poke(); + } + } + x265_picture_free(src); + } +}
View file
x265_3.4.tar.gz/source/abrEncApp.h
Added
@@ -0,0 +1,153 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#ifndef ABR_ENCODE_H +#define ABR_ENCODE_H + +#include "x265.h" +#include "scaler.h" +#include "threading.h" +#include "x265cli.h" + +namespace X265_NS { + // private namespace + + class PassEncoder; + class Scaler; + class Reader; + + class AbrEncoder + { + public: + uint8_t m_numEncodes; + PassEncoder **m_passEnc; + uint32_t m_queueSize; + ThreadSafeInteger m_numActiveEncodes; + + x265_picture ***m_inputPicBuffer; //[numEncodes][queueSize] + x265_analysis_data **m_analysisBuffer; //[numEncodes][queueSize] + int **m_readFlag; + + ThreadSafeInteger *m_picWriteCnt; + ThreadSafeInteger *m_picReadCnt; + ThreadSafeInteger **m_picIdxReadCnt; + ThreadSafeInteger *m_analysisWriteCnt; //[numEncodes][queueSize] + ThreadSafeInteger *m_analysisReadCnt; //[numEncodes][queueSize] + ThreadSafeInteger **m_analysisWrite; //[numEncodes][queueSize] + ThreadSafeInteger **m_analysisRead; //[numEncodes][queueSize] + + AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int& ret); + bool allocBuffers(); + void destroy(); + + }; + + class PassEncoder : public Thread + { + public: + + uint32_t m_id; + x265_param *m_param; + AbrEncoder *m_parent; + x265_encoder *m_encoder; + Reader *m_reader; + Scaler *m_scaler; + bool m_inputOver; + + int m_threadActive; + int m_lastIdx; + uint32_t m_outputNalsCount; + + x265_picture **m_inputPicBuffer; + x265_analysis_data **m_analysisBuffer; + x265_nal **m_outputNals; + x265_picture **m_outputRecon; + + CLIOptions m_cliopt; + InputFile* m_input; + const char* m_reconPlayCmd; + FILE* m_qpfile; + FILE* m_zoneFile; + FILE* m_dolbyVisionRpu;/* File containing Dolby Vision BL RPU metadata */ + + int m_ret; + + PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent); + int init(int &result); + void setReuseLevel(); + + void startThreads(); + void copyInfo(x265_analysis_data *src); + + bool readPicture(x265_picture*); + void destroy(); + + private: + void threadMain(); + }; + + class Scaler : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + int m_scalePlanes[3]; + int m_scaleFrameSize; + uint32_t m_threadId; + uint32_t m_threadTotal; + ThreadSafeInteger m_scaledWriteCnt; + VideoDesc* m_srcFormat; + VideoDesc* m_dstFormat; + int m_threadActive; + ScalerFilterManager* m_filterManager; + + Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc * dst, PassEncoder *parentEnc); + bool scalePic(x265_picture *destination, x265_picture *source); + void threadMain(); + void destroy() + { + if (m_filterManager) + { + delete m_filterManager; + m_filterManager = NULL; + } + } + }; + + class Reader : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + InputFile* m_input; + int m_threadActive; + + Reader(int id, PassEncoder *parentEnc); + void threadMain(); + }; +} + +#endif // ifndef ABR_ENCODE_H +#pragma once
View file
x265_3.3.tar.gz/source/common/CMakeLists.txt -> x265_3.4.tar.gz/source/common/CMakeLists.txt
Changed
@@ -14,7 +14,7 @@ endif(EXTRA_LIB) if(ENABLE_ASSEMBLY) - set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) + set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1") endif(ENABLE_ASSEMBLY) @@ -84,16 +84,33 @@ endif(ENABLE_ASSEMBLY AND X86) if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) - set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + if(ARM64) + if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3")) + message(STATUS "Detected CXX compiler using -O3 optimization level") + add_definitions(-DAUTO_VECTORIZE=1) + endif() + set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h) - # add ARM assembly/intrinsic files here - set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) - set(VEC_PRIMITIVES) + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S) + set(VEC_PRIMITIVES) - set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") - foreach(SRC ${C_SRCS}) - set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) - endforeach() + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + endforeach() + else() + set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) + set(VEC_PRIMITIVES) + + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) + endforeach() + endif() source_group(Assembly FILES ${ASM_PRIMITIVES}) endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) @@ -151,4 +168,5 @@ predict.cpp predict.h scalinglist.cpp scalinglist.h quant.cpp quant.h contexts.h - deblock.cpp deblock.h) + deblock.cpp deblock.h + scaler.cpp scaler.h)
View file
x265_3.4.tar.gz/source/common/aarch64/asm-primitives.cpp
Added
@@ -0,0 +1,219 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "common.h" +#include "primitives.h" +#include "x265.h" +#include "cpu.h" + + +#if defined(__GNUC__) +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#define GCC_4_9_0 40900 +#define GCC_5_1_0 50100 + +extern "C" { +#include "pixel.h" +#include "pixel-util.h" +#include "ipfilter8.h" +} + +namespace X265_NS { +// private x265 namespace + + +template<int size> +void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY) +{ + ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]); + const int halfFilterSize = NTAPS_LUMA >> 1; + const int immedStride = MAX_CU_SIZE; + + primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1); + primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY); +} + + +/* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + asmp.pu[LUMA_8x4].luma_vsp = cp.pu[LUMA_8x4].luma_vsp; + asmp.pu[LUMA_8x8].luma_vsp = cp.pu[LUMA_8x8].luma_vsp; + asmp.pu[LUMA_8x16].luma_vsp = cp.pu[LUMA_8x16].luma_vsp; + asmp.pu[LUMA_8x32].luma_vsp = cp.pu[LUMA_8x32].luma_vsp; + asmp.pu[LUMA_12x16].luma_vsp = cp.pu[LUMA_12x16].luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + asmp.pu[LUMA_16x4].luma_vsp = cp.pu[LUMA_16x4].luma_vsp; + asmp.pu[LUMA_16x8].luma_vsp = cp.pu[LUMA_16x8].luma_vsp; + asmp.pu[LUMA_16x12].luma_vsp = cp.pu[LUMA_16x12].luma_vsp; + asmp.pu[LUMA_16x16].luma_vsp = cp.pu[LUMA_16x16].luma_vsp; + asmp.pu[LUMA_16x32].luma_vsp = cp.pu[LUMA_16x32].luma_vsp; + asmp.pu[LUMA_16x64].luma_vsp = cp.pu[LUMA_16x64].luma_vsp; + asmp.pu[LUMA_32x16].luma_vsp = cp.pu[LUMA_32x16].luma_vsp; + asmp.pu[LUMA_32x24].luma_vsp = cp.pu[LUMA_32x24].luma_vsp; + asmp.pu[LUMA_32x32].luma_vsp = cp.pu[LUMA_32x32].luma_vsp; + asmp.pu[LUMA_32x64].luma_vsp = cp.pu[LUMA_32x64].luma_vsp; + asmp.pu[LUMA_48x64].luma_vsp = cp.pu[LUMA_48x64].luma_vsp; + asmp.pu[LUMA_64x16].luma_vsp = cp.pu[LUMA_64x16].luma_vsp; + asmp.pu[LUMA_64x32].luma_vsp = cp.pu[LUMA_64x32].luma_vsp; + asmp.pu[LUMA_64x48].luma_vsp = cp.pu[LUMA_64x48].luma_vsp; + asmp.pu[LUMA_64x64].luma_vsp = cp.pu[LUMA_64x64].luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */ + asmp.pu[LUMA_4x4].luma_vsp = cp.pu[LUMA_4x4].luma_vsp; + asmp.pu[LUMA_4x8].luma_vsp = cp.pu[LUMA_4x8].luma_vsp; + asmp.pu[LUMA_4x16].luma_vsp = cp.pu[LUMA_4x16].luma_vsp; + asmp.pu[LUMA_24x32].luma_vsp = cp.pu[LUMA_24x32].luma_vsp; + asmp.pu[LUMA_32x8].luma_vsp = cp.pu[LUMA_32x8].luma_vsp; +#endif +#endif + } +} + + +void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + p.pu[LUMA_4x4].satd = PFX(pixel_satd_4x4_neon); + p.pu[LUMA_4x8].satd = PFX(pixel_satd_4x8_neon); + p.pu[LUMA_4x16].satd = PFX(pixel_satd_4x16_neon); + p.pu[LUMA_8x4].satd = PFX(pixel_satd_8x4_neon); + p.pu[LUMA_8x8].satd = PFX(pixel_satd_8x8_neon); + p.pu[LUMA_12x16].satd = PFX(pixel_satd_12x16_neon); + + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd = PFX(pixel_satd_4x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd = PFX(pixel_satd_4x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd = PFX(pixel_satd_4x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd = PFX(pixel_satd_8x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd = PFX(pixel_satd_8x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = PFX(pixel_satd_12x16_neon); + + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd = PFX(pixel_satd_4x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd = PFX(pixel_satd_4x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd = PFX(pixel_satd_4x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = PFX(pixel_satd_4x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd = PFX(pixel_satd_8x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd = PFX(pixel_satd_8x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = PFX(pixel_satd_12x32_neon); + + p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x32_neon); + + p.pu[LUMA_4x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x32_neon); + + p.pu[LUMA_8x4].sad_x3 = PFX(sad_x3_8x4_neon); + p.pu[LUMA_8x8].sad_x3 = PFX(sad_x3_8x8_neon); + p.pu[LUMA_8x16].sad_x3 = PFX(sad_x3_8x16_neon); + p.pu[LUMA_8x32].sad_x3 = PFX(sad_x3_8x32_neon); + + p.pu[LUMA_8x4].sad_x4 = PFX(sad_x4_8x4_neon); + p.pu[LUMA_8x8].sad_x4 = PFX(sad_x4_8x8_neon); + p.pu[LUMA_8x16].sad_x4 = PFX(sad_x4_8x16_neon); + p.pu[LUMA_8x32].sad_x4 = PFX(sad_x4_8x32_neon); + + // quant + p.quant = PFX(quant_neon); + // luma_hps + p.pu[LUMA_4x4].luma_hps = PFX(interp_8tap_horiz_ps_4x4_neon); + p.pu[LUMA_4x8].luma_hps = PFX(interp_8tap_horiz_ps_4x8_neon); + p.pu[LUMA_4x16].luma_hps = PFX(interp_8tap_horiz_ps_4x16_neon); + p.pu[LUMA_8x4].luma_hps = PFX(interp_8tap_horiz_ps_8x4_neon); + p.pu[LUMA_8x8].luma_hps = PFX(interp_8tap_horiz_ps_8x8_neon); + p.pu[LUMA_8x16].luma_hps = PFX(interp_8tap_horiz_ps_8x16_neon); + p.pu[LUMA_8x32].luma_hps = PFX(interp_8tap_horiz_ps_8x32_neon); + p.pu[LUMA_12x16].luma_hps = PFX(interp_8tap_horiz_ps_12x16_neon); + p.pu[LUMA_24x32].luma_hps = PFX(interp_8tap_horiz_ps_24x32_neon); +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.pu[LUMA_16x4].luma_hps = PFX(interp_8tap_horiz_ps_16x4_neon); + p.pu[LUMA_16x8].luma_hps = PFX(interp_8tap_horiz_ps_16x8_neon); + p.pu[LUMA_16x12].luma_hps = PFX(interp_8tap_horiz_ps_16x12_neon); + p.pu[LUMA_16x16].luma_hps = PFX(interp_8tap_horiz_ps_16x16_neon); + p.pu[LUMA_16x32].luma_hps = PFX(interp_8tap_horiz_ps_16x32_neon); + p.pu[LUMA_16x64].luma_hps = PFX(interp_8tap_horiz_ps_16x64_neon); + p.pu[LUMA_32x8].luma_hps = PFX(interp_8tap_horiz_ps_32x8_neon); + p.pu[LUMA_32x16].luma_hps = PFX(interp_8tap_horiz_ps_32x16_neon); + p.pu[LUMA_32x24].luma_hps = PFX(interp_8tap_horiz_ps_32x24_neon); + p.pu[LUMA_32x32].luma_hps = PFX(interp_8tap_horiz_ps_32x32_neon); + p.pu[LUMA_32x64].luma_hps = PFX(interp_8tap_horiz_ps_32x64_neon); + p.pu[LUMA_48x64].luma_hps = PFX(interp_8tap_horiz_ps_48x64_neon); + p.pu[LUMA_64x16].luma_hps = PFX(interp_8tap_horiz_ps_64x16_neon); + p.pu[LUMA_64x32].luma_hps = PFX(interp_8tap_horiz_ps_64x32_neon); + p.pu[LUMA_64x48].luma_hps = PFX(interp_8tap_horiz_ps_64x48_neon); + p.pu[LUMA_64x64].luma_hps = PFX(interp_8tap_horiz_ps_64x64_neon); +#endif + + p.pu[LUMA_8x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x4>; + p.pu[LUMA_8x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x8>; + p.pu[LUMA_8x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x16>; + p.pu[LUMA_8x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x32>; + p.pu[LUMA_12x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_12x16>; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.pu[LUMA_16x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x4>; + p.pu[LUMA_16x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x8>; + p.pu[LUMA_16x12].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x12>; + p.pu[LUMA_16x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x16>; + p.pu[LUMA_16x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x32>; + p.pu[LUMA_16x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x64>; + p.pu[LUMA_32x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x16>; + p.pu[LUMA_32x24].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x24>; + p.pu[LUMA_32x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x32>; + p.pu[LUMA_32x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x64>; + p.pu[LUMA_48x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_48x64>; + p.pu[LUMA_64x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x16>; + p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>; + p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>; + p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */ + p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>; + p.pu[LUMA_4x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x8>; + p.pu[LUMA_4x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x16>; + p.pu[LUMA_24x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_24x32>; + p.pu[LUMA_32x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x8>; +#endif +#endif + +#if !HIGH_BIT_DEPTH + p.cu[BLOCK_4x4].psy_cost_pp = PFX(psyCost_4x4_neon); +#endif // !HIGH_BIT_DEPTH + + } +} +} // namespace X265_NS
View file
x265_3.4.tar.gz/source/common/aarch64/asm.S
Added
@@ -0,0 +1,69 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +.arch armv8-a + +#ifdef PREFIX +#define EXTERN_ASM _ +#else +#define EXTERN_ASM +#endif + +#ifdef __ELF__ +#define ELF +#else +#define ELF @ +#endif + +#define HAVE_AS_FUNC 1 + +#if HAVE_AS_FUNC +#define FUNC +#else +#define FUNC @ +#endif + +.macro function name, export=1 + .macro endfunc +ELF .size \name, . - \name +FUNC .endfunc + .purgem endfunc + .endm + .align 2 +.if \export == 1 + .global EXTERN_ASM\name +ELF .hidden EXTERN_ASM\name +ELF .type EXTERN_ASM\name, %function +FUNC .func EXTERN_ASM\name +EXTERN_ASM\name: +.else +ELF .hidden \name +ELF .type \name, %function +FUNC .func \name +\name: +.endif +.endm + + +#define FENC_STRIDE 64 +#define FDEC_STRIDE 32
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.S
Added
@@ -0,0 +1,414 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + + + +.macro qpel_filter_0_32b + movi v24.8h, #64 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v24.4h + smull2 v18.4s, v19.8h, v24.8h +.endm + +.macro qpel_filter_1_32b + movi v16.8h, #58 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + movi v24.8h, #10 + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #17 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #5 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v4.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v0.8b + uxtl v2.8h, v3.8b + ssubl v21.4s, v2.4h, v1.4h + ssubl2 v22.4s, v2.8h, v1.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + +.macro qpel_filter_2_32b + movi v16.4s, #11 + uxtl v19.8h, v5.8b + uxtl v20.8h, v2.8b + saddl v17.4s, v19.4h, v20.4h + saddl2 v18.4s, v19.8h, v20.8h + + uxtl v21.8h, v1.8b + uxtl v22.8h, v6.8b + saddl v19.4s, v21.4h, v22.4h + saddl2 v20.4s, v21.8h, v22.8h + + mul v19.4s, v19.4s, v16.4s + mul v20.4s, v20.4s, v16.4s + + movi v16.4s, #40 + mul v17.4s, v17.4s, v16.4s + mul v18.4s, v18.4s, v16.4s + + uxtl v21.8h, v4.8b + uxtl v22.8h, v3.8b + saddl v23.4s, v21.4h, v22.4h + saddl2 v16.4s, v21.8h, v22.8h + + uxtl v1.8h, v0.8b + uxtl v2.8h, v7.8b + saddl v21.4s, v1.4h, v2.4h + saddl2 v22.4s, v1.8h, v2.8h + + shl v23.4s, v23.4s, #2 + shl v16.4s, v16.4s, #2 + + add v19.4s, v19.4s, v21.4s + add v20.4s, v20.4s, v22.4s + add v17.4s, v17.4s, v23.4s + add v18.4s, v18.4s, v16.4s + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s +.endm + +.macro qpel_filter_3_32b + movi v16.8h, #17 + movi v24.8h, #5 + + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #58 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #10 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v3.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v4.8b + uxtl v2.8h, v7.8b + ssubl v21.4s, v1.4h, v2.4h + ssubl2 v22.4s, v1.8h, v2.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + + + + +.macro vextin8 + ld1 {v3.16b}, [x11], #16 + mov v7.d[0], v3.d[1] + ext v0.8b, v3.8b, v7.8b, #1 + ext v4.8b, v3.8b, v7.8b, #2 + ext v1.8b, v3.8b, v7.8b, #3 + ext v5.8b, v3.8b, v7.8b, #4 + ext v2.8b, v3.8b, v7.8b, #5 + ext v6.8b, v3.8b, v7.8b, #6 + ext v3.8b, v3.8b, v7.8b, #7 +.endm + + + +// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt) +.macro HPS_FILTER a b filterhps + mov w12, #8192 + mov w6, w10 + sub x3, x3, #\a + lsl x3, x3, #1 + mov w9, #\a + cmp w9, #4 + b.eq 14f + cmp w9, #12 + b.eq 15f + b 7f +14: + HPS_FILTER_4 \a \b \filterhps + b 10f +15: + HPS_FILTER_12 \a \b \filterhps + b 10f +7: + cmp w5, #0 + b.eq 8f + cmp w5, #1 + b.eq 9f +8: +loop1_hps_\filterhps\()_\a\()x\b\()_rowext0: + mov w7, #\a + lsr w7, w7, #3 + mov x11, x0 + sub x11, x11, #4 +loop2_hps_\filterhps\()_\a\()x\b\()_rowext0: + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, [x2], #16 + subs w7, w7, #1 + sub x11, x11, #8 + b.ne loop2_hps_\filterhps\()_\a\()x\b\()_rowext0 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop1_hps_\filterhps\()_\a\()x\b\()_rowext0 + b 10f +9: +loop3_hps_\filterhps\()_\a\()x\b\()_rowext1: + mov w7, #\a + lsr w7, w7, #3 + mov x11, x0 + sub x11, x11, #4 +loop4_hps_\filterhps\()_\a\()x\b\()_rowext1: + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, [x2], #16 + subs w7, w7, #1 + sub x11, x11, #8 + b.ne loop4_hps_\filterhps\()_\a\()x\b\()_rowext1 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop3_hps_\filterhps\()_\a\()x\b\()_rowext1 +10: +.endm + +.macro HPS_FILTER_4 w h filterhps + cmp w5, #0 + b.eq 11f + cmp w5, #1 + b.eq 12f +11: +loop4_hps_\filterhps\()_\w\()x\h\()_rowext0: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, [x2], #8 + sub x11, x11, #8 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop4_hps_\filterhps\()_\w\()x\h\()_rowext0 + b 13f +12: +loop5_hps_\filterhps\()_\w\()x\h\()_rowext1: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, [x2], #8 + sub x11, x11, #8 + subs w6, w6, #1 + add x0, x0, x1 + add x2, x2, x3 + b.ne loop5_hps_\filterhps\()_\w\()x\h\()_rowext1 +13: +.endm + +.macro HPS_FILTER_12 w h filterhps + cmp w5, #0 + b.eq 14f + cmp w5, #1 + b.eq 15f +14: +loop12_hps_\filterhps\()_\w\()x\h\()_rowext0: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, [x2], #16 + sub x11, x11, #8 + + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, [x2], #8 + add x2, x2, x3 + subs w6, w6, #1 + add x0, x0, x1 + b.ne loop12_hps_\filterhps\()_\w\()x\h\()_rowext0 + b 16f +15: +loop12_hps_\filterhps\()_\w\()x\h\()_rowext1: + mov x11, x0 + sub x11, x11, #4 + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + sub v18.4s, v18.4s, v16.4s + xtn v0.4h, v17.4s + xtn2 v0.8h, v18.4s + st1 {v0.8h}, [x2], #16 + sub x11, x11, #8 + + vextin8 + \filterhps + dup v16.4s, w12 + sub v17.4s, v17.4s, v16.4s + xtn v0.4h, v17.4s + st1 {v0.4h}, [x2], #8 + add x2, x2, x3 + subs w6, w6, #1 + add x0, x0, x1 + b.ne loop12_hps_\filterhps\()_\w\()x\h\()_rowext1 +16: +.endm + +// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt) +.macro LUMA_HPS w h +function x265_interp_8tap_horiz_ps_\w\()x\h\()_neon + mov w10, #\h + cmp w5, #0 + b.eq 6f + sub x0, x0, x1, lsl #2 + + add x0, x0, x1 + add w10, w10, #7 +6: + cmp w4, #0 + b.eq 0f + cmp w4, #1 + b.eq 1f + cmp w4, #2 + b.eq 2f + cmp w4, #3 + b.eq 3f +0: + HPS_FILTER \w \h qpel_filter_0_32b + b 5f +1: + HPS_FILTER \w \h qpel_filter_1_32b + b 5f +2: + HPS_FILTER \w \h qpel_filter_2_32b + b 5f +3: + HPS_FILTER \w \h qpel_filter_3_32b + b 5f +5: + ret +endfunc +.endm + +LUMA_HPS 4 4 +LUMA_HPS 4 8 +LUMA_HPS 4 16 +LUMA_HPS 8 4 +LUMA_HPS 8 8 +LUMA_HPS 8 16 +LUMA_HPS 8 32 +LUMA_HPS 12 16 +LUMA_HPS 16 4 +LUMA_HPS 16 8 +LUMA_HPS 16 12 +LUMA_HPS 16 16 +LUMA_HPS 16 32 +LUMA_HPS 16 64 +LUMA_HPS 24 32 +LUMA_HPS 32 8 +LUMA_HPS 32 16 +LUMA_HPS 32 24 +LUMA_HPS 32 32 +LUMA_HPS 32 64 +LUMA_HPS 48 64 +LUMA_HPS 64 16 +LUMA_HPS 64 32 +LUMA_HPS 64 48 +LUMA_HPS 64 64
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.h
Added
@@ -0,0 +1,55 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_IPFILTER8_AARCH64_H +#define X265_IPFILTER8_AARCH64_H + + +void x265_interp_8tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); + + +#endif // ifndef X265_IPFILTER8_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/mc-a.S
Added
@@ -0,0 +1,63 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro pixel_avg_pp_4xN_neon h +function x265_pixel_avg_pp_4x\h\()_neon +.rept \h + ld1 {v0.s}[0], [x2], x3 + ld1 {v1.s}[0], [x4], x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.s}[0], [x0], x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_4xN_neon 4 +pixel_avg_pp_4xN_neon 8 +pixel_avg_pp_4xN_neon 16 + +.macro pixel_avg_pp_8xN_neon h +function x265_pixel_avg_pp_8x\h\()_neon +.rept \h + ld1 {v0.8b}, [x2], x3 + ld1 {v1.8b}, [x4], x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.8b}, [x0], x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_8xN_neon 4 +pixel_avg_pp_8xN_neon 8 +pixel_avg_pp_8xN_neon 16 +pixel_avg_pp_8xN_neon 32
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.S
Added
@@ -0,0 +1,419 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro x265_satd_4x8_8x4_end_neon + add v0.8h, v4.8h, v6.8h + add v1.8h, v5.8h, v7.8h + sub v2.8h, v4.8h, v6.8h + sub v3.8h, v5.8h, v7.8h + + trn1 v16.8h, v0.8h, v1.8h + trn2 v17.8h, v0.8h, v1.8h + add v4.8h, v16.8h, v17.8h + trn1 v18.8h, v2.8h, v3.8h + trn2 v19.8h, v2.8h, v3.8h + sub v5.8h, v16.8h, v17.8h + add v6.8h, v18.8h, v19.8h + sub v7.8h, v18.8h, v19.8h + trn1 v0.4s, v4.4s, v6.4s + trn2 v2.4s, v4.4s, v6.4s + abs v0.8h, v0.8h + trn1 v1.4s, v5.4s, v7.4s + trn2 v3.4s, v5.4s, v7.4s + abs v2.8h, v2.8h + abs v1.8h, v1.8h + abs v3.8h, v3.8h + umax v0.8h, v0.8h, v2.8h + umax v1.8h, v1.8h, v3.8h + add v0.8h, v0.8h, v1.8h + uaddlv s0, v0.8h +.endm + +.macro pixel_satd_4x8_neon + ld1r {v1.2s}, [x2], x3 + ld1r {v0.2s}, [x0], x1 + ld1r {v3.2s}, [x2], x3 + ld1r {v2.2s}, [x0], x1 + ld1r {v5.2s}, [x2], x3 + ld1r {v4.2s}, [x0], x1 + ld1r {v7.2s}, [x2], x3 + ld1r {v6.2s}, [x0], x1 + + ld1 {v1.s}[1], [x2], x3 + ld1 {v0.s}[1], [x0], x1 + usubl v0.8h, v0.8b, v1.8b + ld1 {v3.s}[1], [x2], x3 + ld1 {v2.s}[1], [x0], x1 + usubl v1.8h, v2.8b, v3.8b + ld1 {v5.s}[1], [x2], x3 + ld1 {v4.s}[1], [x0], x1 + usubl v2.8h, v4.8b, v5.8b + ld1 {v7.s}[1], [x2], x3 + add v4.8h, v0.8h, v1.8h + sub v5.8h, v0.8h, v1.8h + ld1 {v6.s}[1], [x0], x1 + usubl v3.8h, v6.8b, v7.8b + add v6.8h, v2.8h, v3.8h + sub v7.8h, v2.8h, v3.8h + x265_satd_4x8_8x4_end_neon +.endm + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x8_neon + pixel_satd_4x8_neon + mov w0, v0.s[0] + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x16_neon + eor w4, w4, w4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w0, w5, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x32_neon + eor w4, w4, w4 +.rept 4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 +.endr + mov w0, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x16_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + + add x0, x4, #4 + add x2, x5, #4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + + add x0, x4, #8 + add x2, x5, #8 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w0, w7, w6 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x32_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + add x0, x4, #4 + add x2, x5, #4 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + add x0, x4, #8 + add x2, x5, #8 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + mov w0, w7 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_8x8_neon + eor w4, w4, w4 + mov x6, x0 + mov x7, x2 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 + add x0, x6, #4 + add x2, x7, #4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w0, w4, w5 + ret +endfunc + +// int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) +function x265_psyCost_4x4_neon + ld1r {v4.2s}, [x0], x1 + ld1r {v5.2s}, [x0], x1 + ld1 {v4.s}[1], [x0], x1 + ld1 {v5.s}[1], [x0], x1 + + ld1r {v6.2s}, [x2], x3 + ld1r {v7.2s}, [x2], x3 + ld1 {v6.s}[1], [x2], x3 + ld1 {v7.s}[1], [x2], x3 + + uaddl v2.8h, v4.8b, v5.8b + usubl v3.8h, v4.8b, v5.8b + uaddl v18.8h, v6.8b, v7.8b + usubl v19.8h, v6.8b, v7.8b + + mov v20.d[0], v2.d[1] + add v0.4h, v2.4h, v20.4h + sub v1.4h, v2.4h, v20.4h + mov v21.d[0], v3.d[1] + add v22.4h, v3.4h, v21.4h + sub v23.4h, v3.4h, v21.4h + + mov v24.d[0], v18.d[1] + add v16.4h, v18.4h, v24.4h + sub v17.4h, v18.4h, v24.4h + mov v25.d[0], v19.d[1] + add v26.4h, v19.4h, v25.4h + sub v27.4h, v19.4h, v25.4h + + mov v0.d[1], v22.d[0] + mov v1.d[1], v23.d[0] + trn1 v22.8h, v0.8h, v1.8h + trn2 v23.8h, v0.8h, v1.8h + mov v16.d[1], v26.d[0] + mov v17.d[1], v27.d[0] + trn1 v26.8h, v16.8h, v17.8h + trn2 v27.8h, v16.8h, v17.8h + + add v2.8h, v22.8h, v23.8h + sub v3.8h, v22.8h, v23.8h + add v18.8h, v26.8h, v27.8h + sub v19.8h, v26.8h, v27.8h + + uaddl v20.8h, v4.8b, v5.8b + uaddl v21.8h, v6.8b, v7.8b + + trn1 v0.4s, v2.4s, v3.4s + trn2 v1.4s, v2.4s, v3.4s + trn1 v16.4s, v18.4s, v19.4s + trn2 v17.4s, v18.4s, v19.4s + abs v0.8h, v0.8h + abs v16.8h, v16.8h + abs v1.8h, v1.8h + abs v17.8h, v17.8h + + uaddlv s20, v20.8h + uaddlv s21, v21.8h + mov v20.s[1], v21.s[0] + + smax v0.8h, v0.8h, v1.8h + smax v16.8h, v16.8h, v17.8h + + trn1 v4.2d, v0.2d, v16.2d + trn2 v5.2d, v0.2d, v16.2d + add v0.8h, v4.8h, v5.8h + mov v4.d[0], v0.d[1] + uaddlv s0, v0.4h + uaddlv s4, v4.4h + + ushr v20.2s, v20.2s, #2 + mov v0.s[1], v4.s[0] + sub v0.2s, v0.2s, v20.2s + mov w0, v0.s[0] + mov w1, v0.s[1] + subs w0, w0, w1 + cneg w0, w0, mi + + ret +endfunc + +// uint32_t quant_c(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff) +function x265_quant_neon + mov w9, #1 + lsl w9, w9, w4 + dup v0.2s, w9 + neg w9, w4 + dup v1.4s, w9 + add w9, w9, #8 + dup v2.4s, w9 + dup v3.4s, w5 + + lsr w6, w6, #2 + eor v4.16b, v4.16b, v4.16b + eor w10, w10, w10 + eor v17.16b, v17.16b, v17.16b + +.loop_quant: + + ld1 {v18.4h}, [x0], #8 + ld1 {v7.4s}, [x1], #16 + sxtl v6.4s, v18.4h + + cmlt v5.4s, v6.4s, #0 + + abs v6.4s, v6.4s + + + mul v6.4s, v6.4s, v7.4s + + add v7.4s, v6.4s, v3.4s + sshl v7.4s, v7.4s, v1.4s + + mls v6.4s, v7.4s, v0.s[0] + sshl v16.4s, v6.4s, v2.4s + st1 {v16.4s}, [x2], #16 + + // numsig + cmeq v16.4s, v7.4s, v17.4s + add v4.4s, v4.4s, v16.4s + add w10, w10, #4 + + // level *= sign + eor v16.16b, v7.16b, v5.16b + sub v16.4s, v16.4s, v5.4s + sqxtn v5.4h, v16.4s + st1 {v5.4h}, [x3], #8 + + subs w6, w6, #1 + b.ne .loop_quant + + addv s4, v4.4s + mov w9, v4.s[0] + add w0, w10, w9 + ret +endfunc + +.macro satd_4x4_neon + ld1 {v1.s}[0], [x2], x3 + ld1 {v0.s}[0], [x0], x1 + ld1 {v3.s}[0], [x2], x3 + ld1 {v2.s}[0], [x0], x1 + + ld1 {v1.s}[1], [x2], x3 + ld1 {v0.s}[1], [x0], x1 + ld1 {v3.s}[1], [x2], x3 + ld1 {v2.s}[1], [x0], x1 + + usubl v4.8h, v0.8b, v1.8b + usubl v5.8h, v2.8b, v3.8b + + add v6.8h, v4.8h, v5.8h + sub v7.8h, v4.8h, v5.8h + + mov v4.d[0], v6.d[1] + add v0.8h, v6.8h, v4.8h + sub v2.8h, v6.8h, v4.8h + + mov v5.d[0], v7.d[1] + add v1.8h, v7.8h, v5.8h + sub v3.8h, v7.8h, v5.8h + + trn1 v4.4h, v0.4h, v1.4h + trn2 v5.4h, v0.4h, v1.4h + + trn1 v6.4h, v2.4h, v3.4h + trn2 v7.4h, v2.4h, v3.4h + + add v0.4h, v4.4h, v5.4h + sub v1.4h, v4.4h, v5.4h + + add v2.4h, v6.4h, v7.4h + sub v3.4h, v6.4h, v7.4h + + trn1 v4.2s, v0.2s, v1.2s + trn2 v5.2s, v0.2s, v1.2s + + trn1 v6.2s, v2.2s, v3.2s + trn2 v7.2s, v2.2s, v3.2s + + abs v4.4h, v4.4h + abs v5.4h, v5.4h + abs v6.4h, v6.4h + abs v7.4h, v7.4h + + smax v1.4h, v4.4h, v5.4h + smax v2.4h, v6.4h, v7.4h + + add v0.4h, v1.4h, v2.4h + uaddlp v0.2s, v0.4h + uaddlp v0.1d, v0.2s +.endm + +// int satd_4x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x4_neon + satd_4x4_neon + umov x0, v0.d[0] + ret +endfunc + +// int satd_8x4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_8x4_neon + mov x4, x0 + mov x5, x2 + satd_4x4_neon + add x0, x4, #4 + add x2, x5, #4 + umov x6, v0.d[0] + satd_4x4_neon + umov x0, v0.d[0] + add x0, x0, x6 + ret +endfunc
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.h
Added
@@ -0,0 +1,40 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_PIXEL_UTIL_AARCH64_H +#define X265_PIXEL_UTIL_AARCH64_H + +int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); + +uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff); +int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride); + +#endif // ifndef X265_PIXEL_UTIL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/pixel.h
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_I386_PIXEL_AARCH64_H +#define X265_I386_PIXEL_AARCH64_H + +void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); + +void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); + +void x265_sad_x4_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); + +#endif // ifndef X265_I386_PIXEL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/sad-a.S
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro SAD_X_START_8 x + ld1 {v0.8b}, [x0], x9 +.if \x == 3 + ld1 {v1.8b}, [x1], x4 + ld1 {v2.8b}, [x2], x4 + ld1 {v3.8b}, [x3], x4 +.elseif \x == 4 + ld1 {v1.8b}, [x1], x5 + ld1 {v2.8b}, [x2], x5 + ld1 {v3.8b}, [x3], x5 + ld1 {v4.8b}, [x4], x5 +.endif + uabdl v16.8h, v0.8b, v1.8b + uabdl v17.8h, v0.8b, v2.8b + uabdl v18.8h, v0.8b, v3.8b +.if \x == 4 + uabdl v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8 x + ld1 {v0.8b}, [x0], x9 +.if \x == 3 + ld1 {v1.8b}, [x1], x4 + ld1 {v2.8b}, [x2], x4 + ld1 {v3.8b}, [x3], x4 +.elseif \x == 4 + ld1 {v1.8b}, [x1], x5 + ld1 {v2.8b}, [x2], x5 + ld1 {v3.8b}, [x3], x5 + ld1 {v4.8b}, [x4], x5 +.endif + uabal v16.8h, v0.8b, v1.8b + uabal v17.8h, v0.8b, v2.8b + uabal v18.8h, v0.8b, v3.8b +.if \x == 4 + uabal v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8xN x, h +function x265_sad_x\x\()_8x\h\()_neon + mov x9, #FENC_STRIDE + SAD_X_START_8 \x +.rept \h - 1 + SAD_X_8 \x +.endr + uaddlv s0, v16.8h + uaddlv s1, v17.8h + uaddlv s2, v18.8h +.if \x == 4 + uaddlv s3, v19.8h +.endif + +.if \x == 3 + stp s0, s1, [x5] + str s2, [x5, #8] +.elseif \x == 4 + stp s0, s1, [x6] + stp s2, s3, [x6, #8] +.endif + ret +endfunc +.endm + +SAD_X_8xN 3 4 +SAD_X_8xN 3 8 +SAD_X_8xN 3 16 +SAD_X_8xN 3 32 + +SAD_X_8xN 4 4 +SAD_X_8xN 4 8 +SAD_X_8xN 4 16 +SAD_X_8xN 4 32
View file
x265_3.3.tar.gz/source/common/arm/asm-primitives.cpp -> x265_3.4.tar.gz/source/common/arm/asm-primitives.cpp
Changed
@@ -5,6 +5,7 @@ * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com> * Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,77 +49,77 @@ p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon); // addAvg - p.pu[LUMA_4x4].addAvg = PFX(addAvg_4x4_neon); - p.pu[LUMA_4x8].addAvg = PFX(addAvg_4x8_neon); - p.pu[LUMA_4x16].addAvg = PFX(addAvg_4x16_neon); - p.pu[LUMA_8x4].addAvg = PFX(addAvg_8x4_neon); - p.pu[LUMA_8x8].addAvg = PFX(addAvg_8x8_neon); - p.pu[LUMA_8x16].addAvg = PFX(addAvg_8x16_neon); - p.pu[LUMA_8x32].addAvg = PFX(addAvg_8x32_neon); - p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon); - p.pu[LUMA_16x4].addAvg = PFX(addAvg_16x4_neon); - p.pu[LUMA_16x8].addAvg = PFX(addAvg_16x8_neon); - p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon); - p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon); - p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon); - p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon); - p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon); - p.pu[LUMA_32x8].addAvg = PFX(addAvg_32x8_neon); - p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon); - p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon); - p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon); - p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon); - p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon); - p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon); - p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon); - p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon); - p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon); + p.pu[LUMA_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); + p.pu[LUMA_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.pu[LUMA_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.pu[LUMA_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.pu[LUMA_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.pu[LUMA_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.pu[LUMA_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.pu[LUMA_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); + p.pu[LUMA_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); + p.pu[LUMA_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.pu[LUMA_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); + p.pu[LUMA_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.pu[LUMA_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.pu[LUMA_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); + p.pu[LUMA_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); + p.pu[LUMA_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); + p.pu[LUMA_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.pu[LUMA_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); + p.pu[LUMA_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + p.pu[LUMA_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); + p.pu[LUMA_48x64].addAvg[NONALIGNED] = PFX(addAvg_48x64_neon); + p.pu[LUMA_64x16].addAvg[NONALIGNED] = PFX(addAvg_64x16_neon); + p.pu[LUMA_64x32].addAvg[NONALIGNED] = PFX(addAvg_64x32_neon); + p.pu[LUMA_64x48].addAvg[NONALIGNED] = PFX(addAvg_64x48_neon); + p.pu[LUMA_64x64].addAvg[NONALIGNED] = PFX(addAvg_64x64_neon); // chroma addAvg - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg = PFX(addAvg_4x2_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg = PFX(addAvg_4x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg = PFX(addAvg_4x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg = PFX(addAvg_4x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg = PFX(addAvg_6x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg = PFX(addAvg_8x2_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg = PFX(addAvg_8x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg = PFX(addAvg_8x6_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg = PFX(addAvg_8x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg = PFX(addAvg_8x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg = PFX(addAvg_8x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg = PFX(addAvg_16x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg = PFX(addAvg_16x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg = PFX(addAvg_32x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon); - - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg = PFX(addAvg_4x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg = PFX(addAvg_4x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg = PFX(addAvg_4x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg = PFX(addAvg_6x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = PFX(addAvg_8x4_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = PFX(addAvg_8x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = PFX(addAvg_8x12_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = PFX(addAvg_8x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = PFX(addAvg_8x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = PFX(addAvg_8x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = PFX(addAvg_16x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[NONALIGNED] = PFX(addAvg_4x2_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[NONALIGNED] = PFX(addAvg_6x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[NONALIGNED] = PFX(addAvg_8x2_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[NONALIGNED] = PFX(addAvg_8x6_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[NONALIGNED] = PFX(addAvg_4x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[NONALIGNED] = PFX(addAvg_6x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[NONALIGNED] = PFX(addAvg_8x12_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[NONALIGNED] = PFX(addAvg_8x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[NONALIGNED] = PFX(addAvg_12x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[NONALIGNED] = PFX(addAvg_16x24_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[NONALIGNED] = PFX(addAvg_24x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[NONALIGNED] = PFX(addAvg_32x48_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); // quant p.quant = PFX(quant_neon); @@ -402,7 +403,7 @@ p.scale2D_64to32 = PFX(scale2D_64to32_neon); // scale1D_128to64 - p.scale1D_128to64 = PFX(scale1D_128to64_neon); + p.scale1D_128to64[NONALIGNED] = PFX(scale1D_128to64_neon); // copy_count p.cu[BLOCK_4x4].copy_cnt = PFX(copy_cnt_4_neon); @@ -411,37 +412,37 @@ p.cu[BLOCK_32x32].copy_cnt = PFX(copy_cnt_32_neon); // filterPixelToShort - p.pu[LUMA_4x4].convert_p2s = PFX(filterPixelToShort_4x4_neon); - p.pu[LUMA_4x8].convert_p2s = PFX(filterPixelToShort_4x8_neon); - p.pu[LUMA_4x16].convert_p2s = PFX(filterPixelToShort_4x16_neon); - p.pu[LUMA_8x4].convert_p2s = PFX(filterPixelToShort_8x4_neon); - p.pu[LUMA_8x8].convert_p2s = PFX(filterPixelToShort_8x8_neon); - p.pu[LUMA_8x16].convert_p2s = PFX(filterPixelToShort_8x16_neon); - p.pu[LUMA_8x32].convert_p2s = PFX(filterPixelToShort_8x32_neon); - p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon); - p.pu[LUMA_16x4].convert_p2s = PFX(filterPixelToShort_16x4_neon); - p.pu[LUMA_16x8].convert_p2s = PFX(filterPixelToShort_16x8_neon); - p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon); - p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon); - p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon); - p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon); - p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon); - p.pu[LUMA_32x8].convert_p2s = PFX(filterPixelToShort_32x8_neon); - p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon); - p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon); - p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon); - p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon); - p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon); - p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon); - p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon); - p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon); - p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon); + p.pu[LUMA_4x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x4_neon); + p.pu[LUMA_4x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x8_neon); + p.pu[LUMA_4x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x16_neon); + p.pu[LUMA_8x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x4_neon); + p.pu[LUMA_8x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x8_neon); + p.pu[LUMA_8x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x16_neon); + p.pu[LUMA_8x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x32_neon); + p.pu[LUMA_12x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_12x16_neon); + p.pu[LUMA_16x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x4_neon); + p.pu[LUMA_16x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x8_neon); + p.pu[LUMA_16x12].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x12_neon); + p.pu[LUMA_16x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x16_neon); + p.pu[LUMA_16x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x32_neon); + p.pu[LUMA_16x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_16x64_neon); + p.pu[LUMA_24x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_24x32_neon); + p.pu[LUMA_32x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x8_neon); + p.pu[LUMA_32x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x16_neon); + p.pu[LUMA_32x24].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x24_neon); + p.pu[LUMA_32x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x32_neon); + p.pu[LUMA_32x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_32x64_neon); + p.pu[LUMA_48x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_48x64_neon); + p.pu[LUMA_64x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x16_neon); + p.pu[LUMA_64x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x32_neon); + p.pu[LUMA_64x48].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x48_neon); + p.pu[LUMA_64x64].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_64x64_neon); // Block_fill - p.cu[BLOCK_4x4].blockfill_s = PFX(blockfill_s_4x4_neon); - p.cu[BLOCK_8x8].blockfill_s = PFX(blockfill_s_8x8_neon); - p.cu[BLOCK_16x16].blockfill_s = PFX(blockfill_s_16x16_neon); - p.cu[BLOCK_32x32].blockfill_s = PFX(blockfill_s_32x32_neon); + p.cu[BLOCK_4x4].blockfill_s[NONALIGNED] = PFX(blockfill_s_4x4_neon); + p.cu[BLOCK_8x8].blockfill_s[NONALIGNED] = PFX(blockfill_s_8x8_neon); + p.cu[BLOCK_16x16].blockfill_s[NONALIGNED] = PFX(blockfill_s_16x16_neon); + p.cu[BLOCK_32x32].blockfill_s[NONALIGNED] = PFX(blockfill_s_32x32_neon); // Blockcopy_ss p.cu[BLOCK_4x4].copy_ss = PFX(blockcopy_ss_4x4_neon); @@ -495,21 +496,21 @@ p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].copy_sp = PFX(blockcopy_sp_32x64_neon); // pixel_add_ps - p.cu[BLOCK_4x4].add_ps = PFX(pixel_add_ps_4x4_neon); - p.cu[BLOCK_8x8].add_ps = PFX(pixel_add_ps_8x8_neon); - p.cu[BLOCK_16x16].add_ps = PFX(pixel_add_ps_16x16_neon); - p.cu[BLOCK_32x32].add_ps = PFX(pixel_add_ps_32x32_neon); - p.cu[BLOCK_64x64].add_ps = PFX(pixel_add_ps_64x64_neon); + p.cu[BLOCK_4x4].add_ps[NONALIGNED] = PFX(pixel_add_ps_4x4_neon); + p.cu[BLOCK_8x8].add_ps[NONALIGNED] = PFX(pixel_add_ps_8x8_neon); + p.cu[BLOCK_16x16].add_ps[NONALIGNED] = PFX(pixel_add_ps_16x16_neon); + p.cu[BLOCK_32x32].add_ps[NONALIGNED] = PFX(pixel_add_ps_32x32_neon); + p.cu[BLOCK_64x64].add_ps[NONALIGNED] = PFX(pixel_add_ps_64x64_neon); // chroma add_ps - p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].add_ps = PFX(pixel_add_ps_4x4_neon); - p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].add_ps = PFX(pixel_add_ps_8x8_neon); - p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps = PFX(pixel_add_ps_16x16_neon); - p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = PFX(pixel_add_ps_32x32_neon); - p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].add_ps = PFX(pixel_add_ps_4x8_neon); - p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].add_ps = PFX(pixel_add_ps_8x16_neon); - p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps = PFX(pixel_add_ps_16x32_neon); - p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps = PFX(pixel_add_ps_32x64_neon); + p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].add_ps[NONALIGNED] = PFX(pixel_add_ps_4x4_neon); + p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].add_ps[NONALIGNED] = PFX(pixel_add_ps_8x8_neon); + p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps[NONALIGNED] = PFX(pixel_add_ps_16x16_neon); + p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps[NONALIGNED] = PFX(pixel_add_ps_32x32_neon); + p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].add_ps[NONALIGNED] = PFX(pixel_add_ps_4x8_neon); + p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].add_ps[NONALIGNED] = PFX(pixel_add_ps_8x16_neon); + p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps[NONALIGNED] = PFX(pixel_add_ps_16x32_neon); + p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps[NONALIGNED] = PFX(pixel_add_ps_32x64_neon); // cpy2Dto1D_shr p.cu[BLOCK_4x4].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_4x4_neon); @@ -518,10 +519,10 @@ p.cu[BLOCK_32x32].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32x32_neon); // ssd_s - p.cu[BLOCK_4x4].ssd_s = PFX(pixel_ssd_s_4x4_neon); - p.cu[BLOCK_8x8].ssd_s = PFX(pixel_ssd_s_8x8_neon); - p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16x16_neon); - p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32x32_neon); + p.cu[BLOCK_4x4].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_4x4_neon); + p.cu[BLOCK_8x8].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_8x8_neon); + p.cu[BLOCK_16x16].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_16x16_neon); + p.cu[BLOCK_32x32].ssd_s[NONALIGNED] = PFX(pixel_ssd_s_32x32_neon); // sse_ss p.cu[BLOCK_4x4].sse_ss = PFX(pixel_sse_ss_4x4_neon); @@ -548,10 +549,10 @@ p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = PFX(pixel_sub_ps_32x64_neon); // calc_Residual - p.cu[BLOCK_4x4].calcresidual = PFX(getResidual4_neon); - p.cu[BLOCK_8x8].calcresidual = PFX(getResidual8_neon); - p.cu[BLOCK_16x16].calcresidual = PFX(getResidual16_neon); - p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_neon); + p.cu[BLOCK_4x4].calcresidual[NONALIGNED] = PFX(getResidual4_neon); + p.cu[BLOCK_8x8].calcresidual[NONALIGNED] = PFX(getResidual8_neon); + p.cu[BLOCK_16x16].calcresidual[NONALIGNED] = PFX(getResidual16_neon); + p.cu[BLOCK_32x32].calcresidual[NONALIGNED] = PFX(getResidual32_neon); // sse_pp p.cu[BLOCK_4x4].sse_pp = PFX(pixel_sse_pp_4x4_neon); @@ -722,31 +723,31 @@ p.pu[LUMA_64x64].sad_x4 = PFX(sad_x4_64x64_neon); // pixel_avg_pp - p.pu[LUMA_4x4].pixelavg_pp = PFX(pixel_avg_pp_4x4_neon); - p.pu[LUMA_4x8].pixelavg_pp = PFX(pixel_avg_pp_4x8_neon); - p.pu[LUMA_4x16].pixelavg_pp = PFX(pixel_avg_pp_4x16_neon); - p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_pp_8x4_neon); - p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_pp_8x8_neon); - p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_pp_8x16_neon); - p.pu[LUMA_8x32].pixelavg_pp = PFX(pixel_avg_pp_8x32_neon); - p.pu[LUMA_12x16].pixelavg_pp = PFX(pixel_avg_pp_12x16_neon); - p.pu[LUMA_16x4].pixelavg_pp = PFX(pixel_avg_pp_16x4_neon); - p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_pp_16x8_neon); - p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_pp_16x12_neon); - p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_pp_16x16_neon); - p.pu[LUMA_16x32].pixelavg_pp = PFX(pixel_avg_pp_16x32_neon); - p.pu[LUMA_16x64].pixelavg_pp = PFX(pixel_avg_pp_16x64_neon); - p.pu[LUMA_24x32].pixelavg_pp = PFX(pixel_avg_pp_24x32_neon); - p.pu[LUMA_32x8].pixelavg_pp = PFX(pixel_avg_pp_32x8_neon); - p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_pp_32x16_neon); - p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_pp_32x24_neon); - p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_pp_32x32_neon); - p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_pp_32x64_neon); - p.pu[LUMA_48x64].pixelavg_pp = PFX(pixel_avg_pp_48x64_neon); - p.pu[LUMA_64x16].pixelavg_pp = PFX(pixel_avg_pp_64x16_neon); - p.pu[LUMA_64x32].pixelavg_pp = PFX(pixel_avg_pp_64x32_neon); - p.pu[LUMA_64x48].pixelavg_pp = PFX(pixel_avg_pp_64x48_neon); - p.pu[LUMA_64x64].pixelavg_pp = PFX(pixel_avg_pp_64x64_neon); + p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x32_neon); + p.pu[LUMA_12x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_12x16_neon); + p.pu[LUMA_16x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x4_neon); + p.pu[LUMA_16x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x8_neon); + p.pu[LUMA_16x12].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x12_neon); + p.pu[LUMA_16x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x16_neon); + p.pu[LUMA_16x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x32_neon); + p.pu[LUMA_16x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_16x64_neon); + p.pu[LUMA_24x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_24x32_neon); + p.pu[LUMA_32x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x8_neon); + p.pu[LUMA_32x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x16_neon); + p.pu[LUMA_32x24].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x24_neon); + p.pu[LUMA_32x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x32_neon); + p.pu[LUMA_32x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_32x64_neon); + p.pu[LUMA_48x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_48x64_neon); + p.pu[LUMA_64x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x16_neon); + p.pu[LUMA_64x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x32_neon); + p.pu[LUMA_64x48].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x48_neon); + p.pu[LUMA_64x64].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_64x64_neon); // planecopy p.planecopy_cp = PFX(pixel_planecopy_cp_neon);
View file
x265_3.3.tar.gz/source/common/common.h -> x265_3.4.tar.gz/source/common/common.h
Changed
@@ -129,6 +129,7 @@ typedef uint64_t sum2_t; typedef uint64_t pixel4; typedef int64_t ssum2_t; +#define SHIFT_TO_BITPLANE 9 #define HISTOGRAM_BINS 1024 #else typedef uint8_t pixel; @@ -136,6 +137,7 @@ typedef uint32_t sum2_t; typedef uint32_t pixel4; typedef int32_t ssum2_t; // Signed sum +#define SHIFT_TO_BITPLANE 7 #define HISTOGRAM_BINS 256 #endif // if HIGH_BIT_DEPTH @@ -270,6 +272,9 @@ #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE) #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE) +#define RDCOST_BASED_RSKIP 1 +#define EDGE_BASED_RSKIP 2 + #define COEF_REMAIN_BIN_REDUCTION 3 // indicates the level at which the VLC // transitions from Golomb-Rice to TU+EG(k)
View file
x265_3.3.tar.gz/source/common/cpu.cpp -> x265_3.4.tar.gz/source/common/cpu.cpp
Changed
@@ -5,6 +5,8 @@ * Laurent Aimar <fenrir@via.ecp.fr> * Fiona Glaser <fiona@x264.com> * Steve Borho <steve@borho.org> + * Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -367,6 +369,8 @@ flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) +#elif X265_ARCH_ARM64 + flags |= X265_CPU_NEON; #endif // if HAVE_ARMV6 return flags; }
View file
x265_3.3.tar.gz/source/common/frame.cpp -> x265_3.4.tar.gz/source/common/frame.cpp
Changed
@@ -61,6 +61,8 @@ m_edgePic = NULL; m_gaussianPic = NULL; m_thetaPic = NULL; + m_edgeBitPlane = NULL; + m_edgeBitPic = NULL; } bool Frame::create(x265_param *param, float* quantOffsets) @@ -115,6 +117,19 @@ m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2))); } + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize; + uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize; + uint32_t lumaMarginX = param->maxCUSize + 32; + uint32_t lumaMarginY = param->maxCUSize + 16; + uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1); + uint32_t maxHeight = numCuInHeight * param->maxCUSize; + uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2)); + CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize); + m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX; + } + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); @@ -267,4 +282,10 @@ X265_FREE(m_gaussianPic); X265_FREE(m_thetaPic); } + + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + X265_FREE_ZERO(m_edgeBitPlane); + m_edgeBitPic = NULL; + } }
View file
x265_3.3.tar.gz/source/common/frame.h -> x265_3.4.tar.gz/source/common/frame.h
Changed
@@ -99,7 +99,7 @@ float* m_quantOffsets; // points to quantOffsets in x265_picture x265_sei m_userSEI; uint32_t m_picStruct; // picture structure SEI message - x265_dolby_vision_rpu m_rpu; + x265_dolby_vision_rpu m_rpu; /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */ ThreadSafeInteger* m_reconRowFlag; // flag of CTU rows completely reconstructed and extended for motion reference @@ -137,6 +137,10 @@ pixel* m_gaussianPic; pixel* m_thetaPic; + /* edge bit plane for rskips 2 and 3 */ + pixel* m_edgeBitPlane; + pixel* m_edgeBitPic; + Frame(); bool create(x265_param *param, float* quantOffsets);
View file
x265_3.3.tar.gz/source/common/param.cpp -> x265_3.4.tar.gz/source/common/param.cpp
Changed
@@ -198,7 +198,8 @@ param->bEnableWeightedPred = 1; param->bEnableWeightedBiPred = 0; param->bEnableEarlySkip = 1; - param->bEnableRecursionSkip = 1; + param->recursionSkipMode = 1; + param->edgeVarThreshold = 0.05f; param->bEnableAMP = 0; param->bEnableRectInter = 0; param->rdLevel = 3; @@ -285,6 +286,7 @@ param->rc.bEnableConstVbv = 0; param->bResetZoneConfig = 1; param->reconfigWindowSize = 0; + param->decoderVbvMaxRate = 0; /* Video Usability Information (VUI) */ param->vui.aspectRatioIdc = 0; @@ -546,7 +548,7 @@ param->maxNumMergeCand = 5; param->searchMethod = X265_STAR_SEARCH; param->bEnableTransformSkip = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->maxNumReferences = 5; param->limitReferences = 0; param->lookaheadSlices = 0; // disabled for best quality @@ -598,7 +600,7 @@ param->rc.hevcAq = 0; param->rc.qpStep = 1; param->rc.bEnableGrain = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->psyRd = 4.0; param->psyRdoq = 10.0; param->bEnableSAO = 0; @@ -702,8 +704,9 @@ OPT("ref") p->maxNumReferences = atoi(value); OPT("fast-intra") p->bEnableFastIntra = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); - OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError); + OPT("rskip") p->recursionSkipMode = atoi(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; + OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError); OPT("subme") p->subpelRefine = atoi(value); OPT("merange") p->searchRange = atoi(value); OPT("rect") p->bEnableRectInter = atobool(value); @@ -919,7 +922,7 @@ OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value); OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); + OPT("rskip") p->recursionSkipMode = atoi(value); OPT("rdpenalty") p->rdPenalty = atoi(value); OPT("tskip") p->bEnableTransformSkip = atobool(value); OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value); @@ -1221,6 +1224,7 @@ } } OPT("hist-threshold") p->edgeTransitionThreshold = atof(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; OPT("lookahead-threads") p->lookaheadThreads = atoi(value); OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value); @@ -1596,9 +1600,16 @@ CHECK(param->rdLevel < 1 || param->rdLevel > 6, "RD Level is out of range"); CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2, - "RDOQ Level is out of range"); + "RDOQ Level is out of range"); CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH, - "Dynamic RD strength must be between 0 and 4"); + "Dynamic RD strength must be between 0 and 4"); + CHECK(param->recursionSkipMode > 2 || param->recursionSkipMode < 0, + "Invalid Recursion skip mode. Valid modes 0,1,2"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f, + "Minimum edge density percentage for a CU should be an integer between 0 to 100"); + } CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead, "Lookahead depth must be greater than the max consecutive bframe count"); CHECK(param->bframes < 0, @@ -1789,6 +1800,7 @@ } CHECK(param->confWinRightOffset < 0, "Conformance Window Right Offset must be 0 or greater"); CHECK(param->confWinBottomOffset < 0, "Conformance Window Bottom Offset must be 0 or greater"); + CHECK(param->decoderVbvMaxRate < 0, "Invalid Decoder Vbv Maxrate. Value can not be less than zero"); return check_failed; } @@ -1908,7 +1920,9 @@ TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf"); TOOLOPT(param->bEnableRdRefine, "rd-refine"); TOOLOPT(param->bEnableEarlySkip, "early-skip"); - TOOLOPT(param->bEnableRecursionSkip, "rskip"); + TOOLVAL(param->recursionSkipMode, "rskip mode=%d"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f"); TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip"); TOOLVAL(param->noiseReductionIntra, "nr-intra=%d"); TOOLVAL(param->noiseReductionInter, "nr-inter=%d"); @@ -2066,7 +2080,10 @@ s += sprintf(s, " rd=%d", p->rdLevel); s += sprintf(s, " selective-sao=%d", p->selectiveSAO); BOOL(p->bEnableEarlySkip, "early-skip"); - BOOL(p->bEnableRecursionSkip, "rskip"); + BOOL(p->recursionSkipMode, "rskip"); + if (p->recursionSkipMode == EDGE_BASED_RSKIP) + s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold); + BOOL(p->bEnableFastIntra, "fast-intra"); BOOL(p->bEnableTSkipFast, "tskip-fast"); BOOL(p->bCULossless, "cu-lossless"); @@ -2204,6 +2221,7 @@ if (p->bEnableSceneCutAwareQp) s += sprintf(s, " scenecut-window=%d max-qp-delta=%d", p->scenecutWindow, p->maxQpDelta); s += sprintf(s, "conformance-window-offsets right=%d bottom=%d", p->confWinRightOffset, p->confWinBottomOffset); + s += sprintf(s, " decoder-max-rate=%d", p->decoderVbvMaxRate); #undef BOOL return buf; } @@ -2373,7 +2391,8 @@ dst->bSaoNonDeblocked = src->bSaoNonDeblocked; dst->rdLevel = src->rdLevel; dst->bEnableEarlySkip = src->bEnableEarlySkip; - dst->bEnableRecursionSkip = src->bEnableRecursionSkip; + dst->recursionSkipMode = src->recursionSkipMode; + dst->edgeVarThreshold = src->edgeVarThreshold; dst->bEnableFastIntra = src->bEnableFastIntra; dst->bEnableTSkipFast = src->bEnableTSkipFast; dst->bCULossless = src->bCULossless; @@ -2419,8 +2438,9 @@ dst->rc.zonefileCount = src->rc.zonefileCount; dst->reconfigWindowSize = src->reconfigWindowSize; dst->bResetZoneConfig = src->bResetZoneConfig; + dst->decoderVbvMaxRate = src->decoderVbvMaxRate; - if (src->rc.zonefileCount && src->rc.zones) + if (src->rc.zonefileCount && src->rc.zones && src->bResetZoneConfig) { for (int i = 0; i < src->rc.zonefileCount; i++) {
View file
x265_3.3.tar.gz/source/common/pixel.cpp -> x265_3.4.tar.gz/source/common/pixel.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <min.chen@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -265,6 +266,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 4) satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -279,6 +284,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 8) satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -876,6 +885,18 @@ } } +static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift) +{ + for (int r = 0; r < height; r++) + { + for (int c = 0; c < width; c++) + dst[c] = (pixel)((src[c] >> shift)); + + dst += dstStride; + src += srcStride; + } +} + static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask) { for (int r = 0; r < height; r++) @@ -1316,6 +1337,7 @@ p.planecopy_cp = planecopy_cp_c; p.planecopy_sp = planecopy_sp_c; p.planecopy_sp_shl = planecopy_sp_shl_c; + p.planecopy_pp_shr = planecopy_pp_shr_c; #if HIGH_BIT_DEPTH p.planeClipAndMax = planeClipAndMax_c; #endif
View file
x265_3.3.tar.gz/source/common/primitives.h -> x265_3.4.tar.gz/source/common/primitives.h
Changed
@@ -8,6 +8,8 @@ * Rajesh Paulraj <rajesh@multicorewareinc.com> * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Hongbin Liu<liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -204,6 +206,7 @@ typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX); typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask); +typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix); typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len); @@ -358,6 +361,7 @@ planecopy_cp_t planecopy_cp; planecopy_sp_t planecopy_sp; planecopy_sp_t planecopy_sp_shl; + planecopy_pp_t planecopy_pp_shr; planeClipAndMax_t planeClipAndMax; weightp_sp_t weight_sp; @@ -465,6 +469,9 @@ void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask); void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask); void setupAliasPrimitives(EncoderPrimitives &p); +#if X265_ARCH_ARM64 +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask); +#endif #if HAVE_ALTIVEC void setupPixelPrimitives_altivec(EncoderPrimitives &p); void setupDCTPrimitives_altivec(EncoderPrimitives &p); @@ -479,4 +486,10 @@ extern const char* PFX(build_info_str); #endif +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 +extern "C" { +#include "aarch64/pixel-util.h" +} +#endif + #endif // ifndef X265_PRIMITIVES_H
View file
x265_3.4.tar.gz/source/common/scaler.cpp
Added
@@ -0,0 +1,1110 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "scaler.h" + +#if _MSC_VER +#pragma warning(disable: 4706) // assignment within conditional +#pragma warning(disable: 4244) // '=' : possible loss of data +#endif + +#define SHORT_MIN (-(1 << 15)) +#define SHORT_MAX ((1 << 15) - 1) +#define SHORT_MAX_10 ((1 << 10) - 1) + +namespace X265_NS{ + +ScalerFilterManager::ScalerFilterManager() : + m_bitDepth(0), + m_algorithmFlags(0), + m_srcW(0), + m_srcH(0), + m_dstW(0), + m_dstH(0), + m_crSrcW(0), + m_crSrcH(0), + m_crDstW(0), + m_crDstH(0), + m_crSrcHSubSample(0), + m_crSrcVSubSample(0), + m_crDstHSubSample(0), + m_crDstVSubSample(0) +{ + for (int i = 0; i < m_numSlice; i++) + m_slices[i] = NULL; + for (int i = 0; i < m_numFilter; i++) + m_ScalerFilters[i] = NULL; +} + +inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size) +{ + for (int i = 0; i < size; i++) + filter2[i] = filter[i]; +} + +#if X265_DEPTH == 8 +static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPos[i]; + for (int j = 0; j < filterSize; j++) + val += ((int)src[sourcePos + j]) * filter[filterSize * i + j]; + // the cubic equation does overflow ... + dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7); + } +} +static uint8_t clipUint8(int a) +{ + if (a&(~0xFF)) + return (-a) >> 31; + else + return a; +} + +static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 64 << 12; + for (int j = 0; j < filterSize; j++) + val += src[j][i] * filter[j]; + dest[i] = clipUint8(val >> 19); + } +} +#else +static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 1 << 16; + uint16_t* dst16bit = (uint16_t *)dest; + for (int j = 0; j < filterSize; j++) + val += src[j][i] * filter[j]; + uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17); + ((uint8_t*)(&dst16bit[i]))[0] = (d); + ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8; + } +} +static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + const uint16_t *srcLocal = (const uint16_t *)src; + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPos[i]; + for (int j = 0; j < filterSize; j++) + val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j]; + // the cubic equation does overflow + dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9); + } +} +#endif + +ScalerFilter::ScalerFilter() : + m_filtLen(0), + m_filtPos(NULL), + m_filt(NULL), + m_sourceSlice(NULL), + m_destSlice(NULL) +{ +} + +ScalerFilter::~ScalerFilter() +{ + if (m_filtPos) { + delete[] m_filtPos; m_filtPos = NULL; + } + if (m_filt) { + delete[] m_filt; m_filt = NULL; + } +} + +void ScalerHLumFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf; + uint8_t ** dst = m_destSlice->m_plane[0].lineBuf; + int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer; + int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer; + int dstW = m_destSlice->m_width; + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane[0].sliceHor += 1; + } +} + +void ScalerHCrFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf; + uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf; + uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf; + uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf; + + int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer; + int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer; + int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer; + int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer; + + int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample; + + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen); + m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane[1].sliceHor += 1; + m_destSlice->m_plane[2].sliceHor += 1; + } +} + +void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + yuv2PlaneX_c(filter, filterSize, src, dest, dstW); +#else + yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW); +#endif +} + +void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + yuv2PlaneX_c(filter, filterSize, src, dest, dstW); +#else + yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW); +#endif +} + +void ScalerVLumFilter::process(int sliceVer, int sliceHor) +{ + (void)sliceHor; + int first = X265_MAX(1 - m_filtLen, m_filtPos[sliceVer]); + int sp = first - m_sourceSlice->m_plane[0].sliceVer; + int dp = sliceVer - m_destSlice->m_plane[0].sliceVer; + uint8_t **src = m_sourceSlice->m_plane[0].lineBuf + sp; + uint8_t **dst = m_destSlice->m_plane[0].lineBuf + dp; + int16_t *filter = m_filt + (sliceVer * m_filtLen); + int dstW = m_destSlice->m_width; + m_vFilterScaler->yuv2PlaneX(filter, m_filtLen, (const int16_t**)src, dst[0], dstW); +} + +void ScalerVCrFilter::process(int sliceVer, int sliceHor) +{ + (void)sliceHor; + + const int crSkipMask = (1 << m_destSlice->m_vCrSubSample) - 1; + if (sliceVer & crSkipMask) + return; + else + { + int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample; + int crSliceVer = sliceVer >> m_destSlice->m_vCrSubSample; + int first = X265_MAX(1 - m_filtLen, m_filtPos[crSliceVer]); + int sp1 = first - m_sourceSlice->m_plane[1].sliceVer; + int sp2 = first - m_sourceSlice->m_plane[2].sliceVer; + int dp1 = crSliceVer - m_destSlice->m_plane[1].sliceVer; + int dp2 = crSliceVer - m_destSlice->m_plane[2].sliceVer; + uint8_t **src1 = m_sourceSlice->m_plane[1].lineBuf + sp1; + uint8_t **src2 = m_sourceSlice->m_plane[2].lineBuf + sp2; + uint8_t **dst1 = m_destSlice->m_plane[1].lineBuf + dp1; + uint8_t **dst2 = m_destSlice->m_plane[2].lineBuf + dp2; + int16_t *filter = m_filt + (crSliceVer * m_filtLen); + + m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src1, dst1[0], dstW); + m_vFilterScaler->yuv2PlaneX((int16_t*)filter, m_filtLen, (const int16_t**)src2, dst2[0], dstW); + } +} + +int ScalerFilter::initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos) +{ + int filterSize; + int filter2Size; + int minFilterSize; + int64_t *filter = NULL; + int64_t *filter2 = NULL; + const int64_t fone = 1LL << (54 - x265_min((int)X265_LOG2(srcW / dstW), 8)); + int *outFilterSize = &m_filtLen; + int64_t xDstInSrc; + int sizeFactor = flag; + + // Init filter pos, the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end + m_filtPos = new int32_t[dstW + 3]; + int32_t **filterPos = &m_filtPos; + + if (inc <= 1 << 16) + filterSize = 1 + sizeFactor; // upscale + else + filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW; + + filterSize = x265_min(filterSize, srcW - 2); + filterSize = x265_max(filterSize, 1); + filter = new int64_t[dstW * sizeof(*filter) * filterSize]; + + xDstInSrc = ((destPos*(int64_t)inc) >> 7) - ((sourcePos * 0x10000LL) >> 7); + for (int i = 0; i < dstW; i++) + { + int xx = (xDstInSrc - (filterSize - 2) * (1LL << 16)) / (1 << 17); + (*filterPos)[i] = xx; + for (int j = 0; j < filterSize; j++) + { + int64_t d = (X265_ABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13; + int64_t coeff = 0; + + if (inc > 1 << 16) + d = d * dstW / srcW; + + if (flag == 4) // BiCUBIC + { + int64_t B = (0) * (1 << 24); + int64_t C = (0.6) * (1 << 24); + + if (d >= 1LL << 31) + coeff = 0.0; + else + { + int64_t dd = (d * d) >> 30; + int64_t ddd = (dd * d) >> 30; + + if (d < 1LL << 30) + coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd + (-18 * (1 << 24) + 12 * B + 6 * C) * dd + (6 * (1 << 24) - 2 * B) * (1 << 30); + else + coeff = (-B - 6 * C) * ddd + (6 * B + 30 * C) * dd + (-12 * B - 48 * C) * d + (8 * B + 24 * C) * (1 << 30); + } + coeff /= (1LL << 54) / fone; + } + else if (flag == 1) // BILINEAR + { + coeff = (1 << 30) - d; + if (coeff < 0) + coeff = 0; + coeff *= fone >> 30; + } + else + assert(0); + + filter[i * filterSize + j] = coeff; + xx++; + } + xDstInSrc += 2 * inc; + } + + //apply src & dst Filter to filter -> filter2 + X265_CHECK(filterSize > 0, "invalid filterSize value.\n"); + filter2Size = filterSize; + filter2 = new int64_t[dstW * sizeof(*filter2) * filter2Size]; + + /* This is hard to read code, but much faster. Speed is crucial here */ + int index = RES_FACTOR_DEF; + int size = dstW * filterSize; + + (size % 4 == 0) && (index = RES_FACTOR_4); + (size % 8 == 0) && (index = RES_FACTOR_8); + (size % 16 == 0) && (index = RES_FACTOR_16); + (size % 32 == 0) && (index = RES_FACTOR_32); + (size % 64 == 0) && (index = RES_FACTOR_64); + + filter_copy_c(filter, filter2, size); + + delete[](filter); + + // try to reduce the filter-size (step1 find size and shift left) + // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not). + minFilterSize = 0; + for (int i = dstW - 1; i >= 0; i--) + { + int min = filter2Size; + int64_t cutOff = 0.0; + + // get rid of near zero elements on the left by shifting left + for (int j = 0; j < filter2Size; j++) + { + int k; + cutOff += X265_ABS(filter2[i * filter2Size]); + + if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone) + break; + // preserve monotonicity because the core can't handle the filter otherwise + if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1]) + break; + + // move filter coefficients left + for (k = 1; k < filter2Size; k++) + filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k]; + filter2[i * filter2Size + k - 1] = 0; + (*filterPos)[i]++; + } + + cutOff = 0; + // count near zeros on the right + for (int j = filter2Size - 1; j > 0; j--) + { + cutOff += X265_ABS(filter2[i * filter2Size + j]); + + if (cutOff > SCALER_MAX_REDUCE_CUTOFF * fone) + break; + min--; + } + + if (min > minFilterSize) + minFilterSize = min; + } + + X265_CHECK(minFilterSize > 0, "invalid minFilterSize value.\n"); + filterSize = (minFilterSize + (filtAlign - 1)) & (~(filtAlign - 1)); + X265_CHECK(filterSize > 0, "invalid filterSize value.\n"); + filter = new int64_t[dstW*filterSize * sizeof(*filter)]; + + *outFilterSize = filterSize; + + // try to reduce the filter-size (step2 reduce it) + for (int i = 0; i < dstW; i++) + { + for (int j = 0; j < filterSize; j++) + { + if (j >= filter2Size) + filter[i * filterSize + j] = 0; + else + filter[i * filterSize + j] = filter2[i * filter2Size + j]; + if ((flag & SCALER_BITEXACT) && j >= minFilterSize) + filter[i * filterSize + j] = 0; + } + } + + // fix borders + for (int i = 0; i < dstW; i++) + { + int j; + if ((*filterPos)[i] < 0) + { + // move filter coefficients left to compensate for filterPos + for (j = 1; j < filterSize; j++) + { + int left = x265_max(j + (*filterPos)[i], 0); + filter[i * filterSize + left] += filter[i * filterSize + j]; + filter[i * filterSize + j] = 0; + } + (*filterPos)[i] = 0; + } + + if ((*filterPos)[i] + filterSize > srcW) + { + int shift = (*filterPos)[i] + x265_min(filterSize - srcW, 0); + int64_t acc = 0; + + for (j = filterSize - 1; j >= 0; j--) + { + if ((*filterPos)[i] + j >= srcW) + { + acc += filter[i * filterSize + j]; + filter[i * filterSize + j] = 0; + } + } + for (j = filterSize - 1; j >= 0; j--) + { + if (j < shift) + filter[i * filterSize + j] = 0; + else + filter[i * filterSize + j] = filter[i * filterSize + j - shift]; + } + + (*filterPos)[i] -= shift; + filter[i * filterSize + srcW - 1 - (*filterPos)[i]] += acc; + } + + X265_CHECK((*filterPos)[i] >= 0, "invalid: Value of (*filterPos)[%d] < 0.\n", i); + X265_CHECK((*filterPos)[i] < srcW, "invalid: Value of (*filterPos)[%d] > %d .\n", i, srcW); + if ((*filterPos)[i] + filterSize > srcW) + { + for (j = 0; j < filterSize; j++) + { + X265_CHECK(!filter[i * filterSize + j], "invalid: Value of filter[%d * filterSize + %d] != 0.\n", i, j); + X265_CHECK((*filterPos)[i] + j < srcW, "invalid: (*filterPos)[%d] + %d > %d .\n", i, i, srcW); + } + } + } + + // init filter + m_filt = new int16_t[(dstW + 3)*(*outFilterSize)]; + int16_t **outFilter = &m_filt; + + // normalize & store in outFilter + for (int i = 0; i < dstW; i++) + { + int64_t error = 0; + int64_t sum = 0; + + for (int j = 0; j < filterSize; j++) + sum += filter[i * filterSize + j]; + sum = (sum + one / 2) / one; + if (!sum) + { + x265_log(NULL, X265_LOG_WARNING, "Scaler: zero vector in scaling\n"); + sum = 1; + } + for (int j = 0; j < *outFilterSize; j++) + { + int64_t v = filter[i * filterSize + j] + error; + int intV = ROUNDED_DIVISION(v, sum); + (*outFilter)[i * (*outFilterSize) + j] = intV; + error = v - intV * sum; + } + } + + (*filterPos)[dstW + 0] = + (*filterPos)[dstW + 1] = + (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; + for (int i = 0; i < *outFilterSize; i++) + { + int k = (dstW - 1) * (*outFilterSize) + i; + (*outFilter)[k + 1 * (*outFilterSize)] = + (*outFilter)[k + 2 * (*outFilterSize)] = + (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k]; + } + + delete[](filter); + delete[](filter2); + return 0; +} + +int ScalerFilterManager::init(int algorithmFlags, VideoDesc *srcVideoDesc, VideoDesc *dstVideoDesc) +{ + int srcW = m_srcW = srcVideoDesc->m_width; + int srcH = m_srcH = srcVideoDesc->m_height; + int dstW = m_dstW = dstVideoDesc->m_width; + int dstH = m_dstH = dstVideoDesc->m_height; + int lumXInc, crXInc; + int lumYInc, crYInc; + int srcHCrPos; + int dstHCrPos; + int srcVCrPos; + int dstVCrPos; + int dst_stride = SCALER_ALIGN(dstW * sizeof(int16_t) + 66, 16); + m_bitDepth = dstVideoDesc->m_inputDepth; + if (m_bitDepth == 16) + dst_stride <<= 1; + + m_algorithmFlags = algorithmFlags; + lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; + lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; + + srcHCrPos = -513; + dstHCrPos = -513; + srcVCrPos = -513; + dstVCrPos = -513; + + int srcCsp = srcVideoDesc->m_csp; + if (x265_cli_csps[srcCsp].planes > 1) + { + m_crSrcHSubSample = x265_cli_csps[srcCsp].width[1]; + m_crSrcVSubSample = x265_cli_csps[srcCsp].height[1]; + m_crSrcW = srcVideoDesc->m_width >> m_crSrcHSubSample; + m_crSrcH = srcVideoDesc->m_height >> m_crSrcVSubSample; + if (srcCsp == 1)// i420 + srcVCrPos = 128; + } + else + { + m_crSrcW = 0; + m_crSrcH = 0; + m_crSrcHSubSample = 0; + m_crSrcVSubSample = 0; + } + int dstCsp = dstVideoDesc->m_csp; + if (x265_cli_csps[dstCsp].planes > 1) + { + m_crDstHSubSample = x265_cli_csps[dstCsp].width[1]; + m_crDstVSubSample = x265_cli_csps[dstCsp].height[1]; + m_crDstW = dstVideoDesc->m_width >> m_crDstHSubSample; + m_crDstH = dstVideoDesc->m_height >> m_crDstVSubSample; + if (dstCsp == 1)// i420 + dstVCrPos = 128; + } + else + { + m_crDstW = 0; + m_crDstH = 0; + m_crDstHSubSample = 0; + m_crDstVSubSample = 0; + } + // Only srcCsp == dstCsp is supported at present + if (srcCsp != dstCsp) + { + x265_log(NULL, X265_LOG_ERROR, "wrong, source csp != destination csp \n"); + return false; + } + + lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW; + lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH; + crXInc = (((int64_t)m_crSrcW << 16) + (m_crDstW >> 1)) / m_crDstW; + crYInc = (((int64_t)m_crSrcH << 16) + (m_crDstH >> 1)) / m_crDstH; + + const int filterAlign = 1; + + // init horizontal Luma Scaler filter + m_ScalerFilters[0] = new ScalerHLumFilter(m_bitDepth); + m_ScalerFilters[0]->initCoeff(m_algorithmFlags, lumXInc, srcW, dstW, filterAlign, 1 << 14, getLocalPos(0, 0), getLocalPos(0, 0)); + + // init horizontal cr Scaler filter + m_ScalerFilters[1] = new ScalerHCrFilter(m_bitDepth); + m_ScalerFilters[1]->initCoeff(m_algorithmFlags, crXInc, m_crSrcW, m_crDstW, filterAlign, 1 << 14, + getLocalPos(m_crSrcHSubSample, srcHCrPos), getLocalPos(m_crDstHSubSample, dstHCrPos)); + + // init vertical Luma scaler filter + m_ScalerFilters[2] = new ScalerVLumFilter(m_bitDepth); + m_ScalerFilters[2]->initCoeff(m_algorithmFlags, lumYInc, srcH, dstH, filterAlign, 1 << 12, getLocalPos(0, 0), getLocalPos(0, 0)); + + // init vertical cr scaler filter + m_ScalerFilters[3] = new ScalerVCrFilter(m_bitDepth); + m_ScalerFilters[3]->initCoeff(m_algorithmFlags, crYInc, m_crSrcH, m_crDstH, filterAlign, 1 << 12, + getLocalPos(m_crSrcVSubSample, srcVCrPos), getLocalPos(m_crDstVSubSample, dstVCrPos)); + + // init slice, must after filter initialization + initScalerSlice(); + + // set slice + m_ScalerFilters[0]->setSlice(m_slices[0], m_slices[1]); + m_ScalerFilters[1]->setSlice(m_slices[0], m_slices[1]); + + m_ScalerFilters[2]->setSlice(m_slices[1], m_slices[2]); + m_ScalerFilters[3]->setSlice(m_slices[1], m_slices[2]); + + return 0; +} + +void HFilterScaler8Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + /* This is hard to read code, but much faster. Speed is crucial here */ + (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8); + + /* Do not check multiple of width 4, if width is already multiple of 8 */ + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4); + + (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + doScaling_c(dst, dstW, src, filter, filterPos, filterSize); +#else + doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize); +#endif +} + +void HFilterScaler10Bit::doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + /* This is hard to read code, but much faster. Speed is crucial here */ + (dstW % 8 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 11) && (IdxF = FIL_11) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 10) && (IdxF = FIL_10) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 9) && (IdxF = FIL_9) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 15) && (IdxF = FIL_15) && (IdxW = FACTOR_8); + (dstW % 8 == 0) && (filterSize == 13) && (IdxF = FIL_13) && (IdxW = FACTOR_8); + + /* Do not check multiple of width 4, if width is already multiple of 8 */ + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + !(dstW % 8 == 0) && (dstW % 4 == 0) && (filterSize == 16) && (IdxF = FIL_16) && (IdxW = FACTOR_4); + + (dstW % 4 == 0) && (filterSize == 24) && (IdxF = FIL_24) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 22) && (IdxF = FIL_22) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 19) && (IdxF = FIL_19) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 17) && (IdxF = FIL_17) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + doScaling_c(dst, dstW, src, filter, filterPos, filterSize); +#else + doScaling_c_h(dst, dstW, src, filter, filterPos, filterSize); +#endif +} + +int ScalerFilterManager::scale_pic(void ** src, void ** dst, int * srcStride, int * dstStride) +{ + uint8_t** src_8bit, **dst_8bit; + src_8bit = (uint8_t**)src; + dst_8bit = (uint8_t**)dst; + if (!src_8bit || !dst_8bit) + return -1; + + const int srcsliceHor = m_srcH; + const int dstW = m_dstW; + const int dstH = m_dstH; + int32_t *vLumFilterPos = m_ScalerFilters[2]->m_filtPos; + int32_t *vCrFilterPos = m_ScalerFilters[3]->m_filtPos; + const int vLumFilterSize = m_ScalerFilters[2]->m_filtLen; + const int vCrFilterSize = m_ScalerFilters[3]->m_filtLen; + const int crSrcsliceHor = UH_CEIL_SHIFTR(srcsliceHor, m_crSrcVSubSample); + + // vars which will change and which we need to store back in the context + int lumBufIndex = -1; + int crBufIndex = -1; + int lastInLumBuf = -1; + int lastInCrBuf = -1; + + int hasLumHoles = 1; + int hasCrHoles = 1; + + ScalerSlice *src_slice = m_slices[0]; + ScalerSlice *hout_slice = m_slices[1]; + ScalerSlice *vout_slice = m_slices[2]; + src_slice->initFromSrc((uint8_t**)src, srcStride, m_srcW, 0, srcsliceHor, 0, crSrcsliceHor, 1); + vout_slice->initFromSrc((uint8_t**)dst, dstStride, m_dstW, 0, dstH, 0, UH_CEIL_SHIFTR(dstH, m_crDstVSubSample), 0); + + hout_slice->m_plane[0].sliceVer = 0; + hout_slice->m_plane[1].sliceVer = 0; + hout_slice->m_plane[2].sliceVer = 0; + hout_slice->m_plane[3].sliceVer = 0; + hout_slice->m_plane[0].sliceHor = 0; + hout_slice->m_plane[1].sliceHor = 0; + hout_slice->m_plane[2].sliceHor = 0; + hout_slice->m_plane[3].sliceHor = 0; + hout_slice->m_width = dstW; + + for (int dstY = 0; dstY < dstH; dstY++) + { + const int crDstY = dstY >> m_crDstVSubSample; + const int firstLumSrcY = x265_max(1 - vLumFilterSize, vLumFilterPos[dstY]); + const int firstLumSrcY2 = x265_max(1 - vLumFilterSize, vLumFilterPos[x265_min(dstY | ((1 << m_crDstVSubSample) - 1), dstH - 1)]); + const int firstCrSrcY = x265_max(1 - vCrFilterSize, vCrFilterPos[crDstY]); + + int lastLumSrcY = x265_min(m_srcH, firstLumSrcY + vLumFilterSize) - 1; + int lastLumSrcY2 = x265_min(m_srcH, firstLumSrcY2 + vLumFilterSize) - 1; + int lastCrSrcY = x265_min(m_crSrcH, firstCrSrcY + vCrFilterSize) - 1; + + // handle holes + if (firstLumSrcY > lastInLumBuf) + { + hasLumHoles = lastInLumBuf != firstLumSrcY - 1; + if (hasLumHoles) + { + hout_slice->m_plane[0].sliceVer = firstLumSrcY; + hout_slice->m_plane[3].sliceVer = firstLumSrcY; + hout_slice->m_plane[0].sliceHor = + hout_slice->m_plane[3].sliceHor = 0; + } + + lastInLumBuf = firstLumSrcY - 1; + } + if (firstCrSrcY > lastInCrBuf) + { + hasCrHoles = lastInCrBuf != firstCrSrcY - 1; + if (hasCrHoles) + { + hout_slice->m_plane[1].sliceVer = firstCrSrcY; + hout_slice->m_plane[2].sliceVer = firstCrSrcY; + hout_slice->m_plane[1].sliceHor = + hout_slice->m_plane[2].sliceHor = 0; + } + + lastInCrBuf = firstCrSrcY - 1; + } + + // Do we have enough lines in this slice to output the dstY line + int enoughLines = lastLumSrcY2 < 0 + srcsliceHor && lastCrSrcY < UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample); + if (!enoughLines) + { + lastLumSrcY = 0 + srcsliceHor - 1; + lastCrSrcY = 0 + crSrcsliceHor - 1; + x265_log(NULL, X265_LOG_INFO, "buffering slice: lastLumSrcY %d lastCrSrcY %d\n", lastLumSrcY, lastCrSrcY); + } + + X265_CHECK(((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->m_plane[0].availLines), "invalid value %d", lastLumSrcY - firstLumSrcY + 1); + X265_CHECK((lastCrSrcY - firstCrSrcY + 1) <= hout_slice->m_plane[1].availLines, "invalid value %d", lastCrSrcY - firstCrSrcY + 1); + + int firstPosY, lastPosY, firstCPosY, lastCPosY; + int posY = hout_slice->m_plane[0].sliceVer + hout_slice->m_plane[0].sliceHor; + if (posY <= lastLumSrcY && !hasLumHoles) + { + firstPosY = x265_max(firstLumSrcY, posY); + lastPosY = x265_min(firstLumSrcY + hout_slice->m_plane[0].availLines - 1, 0 + srcsliceHor - 1); + } + else + { + firstPosY = posY; + lastPosY = lastLumSrcY; + } + + int cPosY = hout_slice->m_plane[1].sliceVer + hout_slice->m_plane[1].sliceHor; + if (cPosY <= lastCrSrcY && !hasCrHoles) + { + firstCPosY = x265_max(firstCrSrcY, cPosY); + lastCPosY = x265_min(firstCrSrcY + hout_slice->m_plane[1].availLines - 1, UH_CEIL_SHIFTR(0 + srcsliceHor, m_crSrcVSubSample) - 1); + } + else + { + firstCPosY = cPosY; + lastCPosY = lastCrSrcY; + } + + hout_slice->rotate(lastPosY, lastCPosY); + // horizontal luma scale + if (posY < lastLumSrcY + 1) + m_ScalerFilters[0]->process(firstPosY, lastPosY - firstPosY + 1); + + lumBufIndex += lastLumSrcY - lastInLumBuf; + lastInLumBuf = lastLumSrcY; + // horizontal chroma Scale + if (cPosY < lastCrSrcY + 1) + m_ScalerFilters[1]->process(firstCPosY, lastCPosY - firstCPosY + 1); + + crBufIndex += lastCrSrcY - lastInCrBuf; + lastInCrBuf = lastCrSrcY; + + // wrap buf index around to stay inside the ring buffer + if (lumBufIndex >= vLumFilterSize) + lumBufIndex -= vLumFilterSize; + if (crBufIndex >= vCrFilterSize) + crBufIndex -= vCrFilterSize; + if (!enoughLines) + break; // we can't output a dstY line so let's try with the next slice + + // vertical scale(output converter) + for (int i = 2; i < m_numFilter; ++i) + m_ScalerFilters[i]->process(dstY, 1); + } + return 0; +} + +void ScalerFilterManager::getMinBufferSize(int *out_lum_size, int *out_cr_size) +{ + int lumY; + int dstH = m_dstH; + int crDstH = m_crDstH; + int *lumFilterPos = m_ScalerFilters[2]->m_filtPos; + int *crFilterPos = m_ScalerFilters[3]->m_filtPos; + int lumFilterSize = m_ScalerFilters[2]->m_filtLen; + int crFilterSize = m_ScalerFilters[3]->m_filtLen; + int crSubSample = m_crSrcVSubSample; + + *out_lum_size = lumFilterSize; + *out_cr_size = crFilterSize; + + for (lumY = 0; lumY < dstH; lumY++) + { + int crY = (int64_t)lumY * crDstH / dstH; + int nextSlice = x265_max(lumFilterPos[lumY] + lumFilterSize - 1, ((crFilterPos[crY] + crFilterSize - 1) << crSubSample)); + + nextSlice >>= crSubSample; + nextSlice <<= crSubSample; + (*out_lum_size) = x265_max((*out_lum_size), nextSlice - lumFilterPos[lumY]); + (*out_cr_size) = x265_max((*out_cr_size), (nextSlice >> crSubSample) - crFilterPos[crY]); + } +} + +int ScalerFilterManager::initScalerSlice() +{ + int ret = 0; + int dst_stride = SCALER_ALIGN(m_dstW * sizeof(int16_t) + 66, 16); + if (m_bitDepth == 16) + dst_stride <<= 1; + + int lumBufSize; + int crBufSize; + int vLumFilterSize = m_ScalerFilters[2]->m_filtLen; // Vertical filter size for luma pixels. + int vCrFilterSize = m_ScalerFilters[3]->m_filtLen; // Vertical filter size for chroma pixels. + getMinBufferSize(&lumBufSize, &crBufSize); + lumBufSize = X265_MAX(lumBufSize, vLumFilterSize + MAX_NUM_LINES_AHEAD); + crBufSize = X265_MAX(crBufSize, vCrFilterSize + MAX_NUM_LINES_AHEAD); + + for (int i = 0; i < m_numSlice; i++) + m_slices[i] = new ScalerSlice; + ret = m_slices[0]->create(m_srcH, m_crSrcH, m_crSrcHSubSample, m_crSrcVSubSample, 0); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "alloc_slice m_slice[0] failed\n"); + return -1; + } + + // horizontal scaler output + ret = m_slices[1]->create(lumBufSize, crBufSize, m_crDstHSubSample, m_crDstVSubSample, 1); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "m_slice[1].create failed\n"); + return -1; + } + ret = m_slices[1]->createLines(dst_stride, m_dstW); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "m_slice[1].createLines failed\n"); + return -1; + } + + m_slices[1]->fillOnes(dst_stride >> 1, m_bitDepth == 16); + + // vertical scaler output + ret = m_slices[2]->create(m_dstH, m_crDstH, m_crDstHSubSample, m_crDstVSubSample, 0); + if (ret < 0) + { + x265_log(NULL, X265_LOG_ERROR, "m_slice[2].create failed\n"); + return -1; + } + + return 0; +} + +int ScalerFilterManager::getLocalPos(int crSubSample, int pos) +{ + if (pos == -1 || pos <= -513) + pos = (128 << crSubSample) - 128; + pos += 128; // relative to ideal left edge + return pos >> crSubSample; +} + +ScalerSlice::ScalerSlice() : + m_width(0), + m_hCrSubSample(0), + m_vCrSubSample(0), + m_isRing(0), + m_destroyLines(0) +{ + for (int i = 0; i < m_numSlicePlane; i++) + { + m_plane[i].availLines = 0; + m_plane[i].sliceVer = 0; + m_plane[i].sliceHor = 0; + m_plane[i].lineBuf = NULL; + } +} + +void ScalerSlice::destroy() +{ + if (m_destroyLines) + destroyLines(); + for (int i = 0; i < m_numSlicePlane; i++) + { + if (m_plane[i].lineBuf) + X265_FREE(m_plane[i].lineBuf); + } +} + +int ScalerSlice::create(int lumLines, int crLines, int h_sub_sample, int v_sub_sample, int ring) +{ + int i; + int size[4] = { lumLines, crLines, crLines, lumLines }; + + m_hCrSubSample = h_sub_sample; + m_vCrSubSample = v_sub_sample; + m_isRing = ring; + m_destroyLines = 0; + + for (i = 0; i < m_numSlicePlane; ++i) + { + int n = size[i] * (ring == 0 ? 1 : 3); + m_plane[i].lineBuf = X265_MALLOC(uint8_t*, n); + if (!m_plane[i].lineBuf) + return -1; + + m_plane[i].availLines = size[i]; + m_plane[i].sliceVer = 0; + m_plane[i].sliceHor = 0; + } + return 0; +} + +/* +slice lines contains extra bytes for vectorial code thus @size +is the allocated memory size and @width is the number of pixels +*/ +int ScalerSlice::createLines(int size, int width) +{ + int i; + int idx[2] = { 3, 2 }; + + m_destroyLines = 1; + m_width = width; + + for (i = 0; i < 2; ++i) { + int n = m_plane[i].availLines; + int j; + int ii = idx[i]; + assert(n == m_plane[ii].availLines); + for (j = 0; j < n; ++j) + { + // chroma plane line U and V are expected to be contiguous in memory + m_plane[i].lineBuf[j] = (uint8_t*)X265_MALLOC(uint8_t, size * 2 + 32); + if (!m_plane[i].lineBuf[j]) + { + destroyLines(); + return -1; + } + m_plane[ii].lineBuf[j] = m_plane[i].lineBuf[j] + size + 16; + if (m_isRing) + { + m_plane[i].lineBuf[j + n] = m_plane[i].lineBuf[j]; + m_plane[ii].lineBuf[j + n] = m_plane[ii].lineBuf[j]; + } + } + } + + return 0; +} + +void ScalerSlice::destroyLines() +{ + int i; + for (i = 0; i < 2; ++i) + { + int n = m_plane[i].availLines; + int j; + for (j = 0; j < n; ++j) + { + X265_FREE(m_plane[i].lineBuf[j]); + m_plane[i].lineBuf[j] = NULL; + if (m_isRing) + m_plane[i].lineBuf[j + n] = NULL; + } + } + + for (i = 0; i < m_numSlicePlane; ++i) + memset(m_plane[i].lineBuf, 0, sizeof(uint8_t*) * m_plane[i].availLines * (m_isRing ? 3 : 1)); + m_destroyLines = 0; +} + +void ScalerSlice::fillOnes(int n, int is16bit) +{ + int i; + for (i = 0; i < m_numSlicePlane; ++i) + { + int j; + int size = m_plane[i].availLines; + for (j = 0; j < size; ++j) + { + int k; + int end = is16bit ? n >> 1 : n; + // fill also one extra element + end += 1; + if (is16bit) + for (k = 0; k < end; ++k) + ((int32_t*)(m_plane[i].lineBuf[j]))[k] = 1 << 18; + else + for (k = 0; k < end; ++k) + ((int16_t*)(m_plane[i].lineBuf[j]))[k] = 1 << 14; + } + } +} + +int ScalerSlice::rotate(int lum, int cr) +{ + int i; + if (lum) + { + for (i = 0; i < m_numSlicePlane; i += 3) + { + int n = m_plane[i].availLines; + int l = lum - m_plane[i].sliceVer; + + if (l >= n * 2) + { + m_plane[i].sliceVer += n; + m_plane[i].sliceHor -= n; + } + } + } + if (cr) + { + for (i = 1; i < 3; ++i) + { + int n = m_plane[i].availLines; + int l = cr - m_plane[i].sliceVer; + + if (l >= n * 2) + { + m_plane[i].sliceVer += n; + m_plane[i].sliceHor -= n; + } + } + } + return 0; +} + +int ScalerSlice::initFromSrc(uint8_t *src[4], const int stride[4], int srcW, int lumY, int lumH, int crY, int crH, int relative) +{ + int i = 0; + + const int start[m_numSlicePlane] = { lumY, crY, crY, lumY }; + + const int end[m_numSlicePlane] = { lumY + lumH, crY + crH, crY + crH, lumY + lumH }; + + uint8_t *const src_[m_numSlicePlane] = { src[0] + (relative ? 0 : start[0]) * stride[0], + src[1] + (relative ? 0 : start[1]) * stride[1], + src[2] + (relative ? 0 : start[2]) * stride[2], + src[3] + (relative ? 0 : start[3]) * stride[3] }; + + m_width = srcW; + + for (i = 0; i < m_numSlicePlane; ++i) + { + int j; + int first = m_plane[i].sliceVer; + int n = m_plane[i].availLines; + int lines = end[i] - start[i]; + int tot_lines = end[i] - first; + + if (start[i] >= first && n >= tot_lines) + { + m_plane[i].sliceHor = x265_max(tot_lines, m_plane[i].sliceHor); + for (j = 0; j < lines; j += 1) + m_plane[i].lineBuf[start[i] - first + j] = src_[i] + j * stride[i]; + } + else + { + m_plane[i].sliceVer = start[i]; + lines = lines > n ? n : lines; + m_plane[i].sliceHor = lines; + for (j = 0; j < lines; j += 1) + m_plane[i].lineBuf[j] = src_[i] + j * stride[i]; + } + } + return 0; +} +}
View file
x265_3.4.tar.gz/source/common/scaler.h
Added
@@ -0,0 +1,254 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Pooja Venkatesan <pooja@multicorewareinc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_SCALER_H +#define X265_SCALER_H + +#include "common.h" + +namespace X265_NS { +//x265 private namespace + +class ScalerSlice; +class VideoDesc; + +#define MAX_NUM_LINES_AHEAD 4 +#define SCALER_ALIGN(x, j) (((x)+(j)-1)&~((j)-1)) +#define X265_ABS(j) ((j) >= 0 ? (j) : (-(j))) +#define SCALER_MAX_REDUCE_CUTOFF 0.002 +#define SCALER_BITEXACT 0x80000 +#define ROUNDED_DIVISION(i,j) (((i)>0 ? (i) + ((j)>>1) : (i) - ((j)>>1))/(j)) +#define UH_CEIL_SHIFTR(i,j) (!scale_builtin_constant_p(j) ? -((-(i)) >> (j)) \ + : ((i) + (1<<(j)) - 1) >> (j)) + +#if defined(__GNUC__) || defined(__clang__) +# define scale_builtin_constant_p __builtin_constant_p +#else +# define scale_builtin_constant_p(x) 0 +#endif + +enum ResFactor +{ + RES_FACTOR_64, RES_FACTOR_32, RES_FACTOR_16, RES_FACTOR_8, + RES_FACTOR_4, RES_FACTOR_DEF, NUM_RES_FACTOR +}; + +enum ScalerFactor +{ + FACTOR_4, FACTOR_8, NUM_FACTOR +}; + +enum FilterSize +{ + FIL_4, FIL_6, FIL_8, FIL_9, FIL_10, FIL_11, FIL_13, FIL_15, + FIL_16, FIL_17, FIL_19, FIL_22, FIL_24, FIL_DEF, NUM_FIL +}; + +class ScalerFilter { +public: + int m_filtLen; + int32_t* m_filtPos; // Array of horizontal/vertical starting pos for each dst for luma / chroma planes. + int16_t* m_filt; // Array of horizontal/vertical filter coefficients for luma / chroma planes. + ScalerSlice* m_sourceSlice; // Source slice + ScalerSlice* m_destSlice; // Output slice + ScalerFilter(); + virtual ~ScalerFilter(); + virtual void process(int sliceVer, int sliceHor) = 0; + int initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos); + void setSlice(ScalerSlice* source, ScalerSlice* dest) { m_sourceSlice = source; m_destSlice = dest; } +}; + +class VideoDesc { +public: + int m_width; + int m_height; + int m_csp; + int m_inputDepth; + + VideoDesc(int w, int h, int csp, int bitDepth) + { + m_width = w; + m_height = h; + m_csp = csp; + m_inputDepth = bitDepth; + } +}; + +typedef struct ScalerPlane +{ + int availLines; // max number of lines that can be held by this plane + int sliceVer; // index of first line + int sliceHor; // number of lines + uint8_t** lineBuf; // line buffer +} ScalerPlane; + +// Assist horizontal filtering, base class +class HFilterScaler { +public: + int m_bitDepth; +public: + HFilterScaler() :m_bitDepth(0) {}; + virtual ~HFilterScaler() {}; + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) = 0; +}; + +// Assist vertical filtering, base class +class VFilterScaler { +public: + int m_bitDepth; +public: + VFilterScaler() :m_bitDepth(0) {}; + virtual ~VFilterScaler() {}; + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) = 0; +}; + +// Assist horizontal filtering, process 8 bit case +class HFilterScaler8Bit : public HFilterScaler { +public: + HFilterScaler8Bit() { m_bitDepth = 8; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist horizontal filtering, process 10 bit case +class HFilterScaler10Bit : public HFilterScaler { +public: + HFilterScaler10Bit() { m_bitDepth = 10; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist vertical filtering, process 8 bit case +class VFilterScaler8Bit : public VFilterScaler { +public: + VFilterScaler8Bit() { m_bitDepth = 8; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Assist vertical filtering, process 10 bit case +class VFilterScaler10Bit : public VFilterScaler { +public: + VFilterScaler10Bit() { m_bitDepth = 10; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Horizontal filter for luma +class ScalerHLumFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHLumFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHLumFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Horizontal filter for chroma +class ScalerHCrFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHCrFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHCrFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for luma +class ScalerVLumFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVLumFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVLumFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for chroma +class ScalerVCrFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVCrFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVCrFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +class ScalerSlice +{ +private: + enum ScalerSlicePlaneNum { m_numSlicePlane = 4 }; +public: + int m_width; // Slice line width + int m_hCrSubSample; // horizontal Chroma subsampling factor + int m_vCrSubSample; // vertical chroma subsampling factor + int m_isRing; // flag to identify if this ScalerSlice is a ring buffer + int m_destroyLines; // flag to identify if there are dynamic allocated lines + ScalerPlane m_plane[m_numSlicePlane]; +public: + ScalerSlice(); + ~ScalerSlice() { destroy(); } + int rotate(int lum, int cr); + void fillOnes(int n, int is16bit); + int create(int lumLines, int crLines, int h_sub_sample, int v_sub_sample, int ring); + int createLines(int size, int width); + void destroyLines(); + void destroy(); + int initFromSrc(uint8_t *src[4], const int stride[4], int srcW, int lumY, int lumH, int crY, int crH, int relative); +}; + +class ScalerFilterManager { +private: + enum ScalerFilterNum { m_numSlice = 3, m_numFilter = 4 }; + +private: + int m_bitDepth; + int m_algorithmFlags; // 1, bilinear; 4 bicubic, default is bicubic + int m_srcW; // Width of source luma planes. + int m_srcH; // Height of source luma planes. + int m_dstW; // Width of dest luma planes. + int m_dstH; // Height of dest luma planes. + int m_crSrcW; // Width of source chroma planes. + int m_crSrcH; // Height of source chroma planes. + int m_crDstW; // Width of dest chroma planes. + int m_crDstH; // Height of dest chroma planes. + int m_crSrcHSubSample; // Binary log of horizontal subsampling factor between Y and Cr planes in src image. + int m_crSrcVSubSample; // Binary log of vertical subsampling factor between Y and Cr planes in src image. + int m_crDstHSubSample; // Binary log of horizontal subsampling factor between Y and Cr planes in dest image. + int m_crDstVSubSample; // Binary log of vertical subsampling factor between Y and Cr planes in dest image. + ScalerSlice* m_slices[m_numSlice]; + ScalerFilter* m_ScalerFilters[m_numFilter]; +private: + int getLocalPos(int crSubSample, int pos); + void getMinBufferSize(int *out_lum_size, int *out_cr_size); + int initScalerSlice(); +public: + ScalerFilterManager(); + ~ScalerFilterManager() { + for (int i = 0; i < m_numSlice; i++) + if (m_slices[i]) { m_slices[i]->destroy(); delete m_slices[i]; m_slices[i] = NULL; } + for (int i = 0; i < m_numFilter; i++) + if (m_ScalerFilters[i]) { delete m_ScalerFilters[i]; m_ScalerFilters[i] = NULL; } + } + int init(int algorithmFlags, VideoDesc* srcVideoDesc, VideoDesc* dstVideoDesc); + int scale_pic(void** src, void** dst, int* srcStride, int* dstStride); +}; +} + +#endif //ifndef X265_SCALER_H
View file
x265_3.3.tar.gz/source/common/threading.h -> x265_3.4.tar.gz/source/common/threading.h
Changed
@@ -238,6 +238,14 @@ LeaveCriticalSection(&m_cs); } + void decr() + { + EnterCriticalSection(&m_cs); + m_val--; + WakeAllConditionVariable(&m_cv); + LeaveCriticalSection(&m_cs); + } + protected: CRITICAL_SECTION m_cs; @@ -436,6 +444,14 @@ pthread_mutex_unlock(&m_mutex); } + void decr() + { + pthread_mutex_lock(&m_mutex); + m_val--; + pthread_cond_broadcast(&m_cond); + pthread_mutex_unlock(&m_mutex); + } + protected: pthread_mutex_t m_mutex;
View file
x265_3.3.tar.gz/source/encoder/analysis.cpp -> x265_3.4.tar.gz/source/encoder/analysis.cpp
Changed
@@ -1272,7 +1272,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1296,7 +1296,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1314,15 +1314,23 @@ skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } - if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) + if (md.bestMode && m_param->recursionSkipMode && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) { skipRecursion = md.bestMode->cu.isSkipped(0); - if (mightSplit && depth >= minDepth && !skipRecursion) + if (mightSplit && !skipRecursion) { - if (depth) - skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); - if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + if (depth >= minDepth && m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + if (depth) + skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); + if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + skipRecursion = complexityCheckCU(*md.bestMode); + } + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { skipRecursion = complexityCheckCU(*md.bestMode); + } + } } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) @@ -1972,7 +1980,7 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N) @@ -1996,7 +2004,7 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } } @@ -2015,8 +2023,10 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode); } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) skipRecursion = true; @@ -3525,27 +3535,47 @@ bool Analysis::complexityCheckCU(const Mode& bestMode) { - uint32_t mean = 0; - uint32_t homo = 0; - uint32_t cuSize = bestMode.fencYuv->m_size; - for (uint32_t y = 0; y < cuSize; y++) { - for (uint32_t x = 0; x < cuSize; x++) { - mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]); + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + uint32_t mean = 0; + uint32_t homo = 0; + uint32_t cuSize = bestMode.fencYuv->m_size; + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]); + } } - } - mean = mean / (cuSize * cuSize); - for (uint32_t y = 0 ; y < cuSize; y++){ - for (uint32_t x = 0 ; x < cuSize; x++){ - homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean)); + mean = mean / (cuSize * cuSize); + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean)); + } } - } - homo = homo / (cuSize * cuSize); + homo = homo / (cuSize * cuSize); - if (homo < (.1 * mean)) - return true; + if (homo < (.1 * mean)) + return true; - return false; -} + return false; + } + else + { + int blockType = bestMode.cu.m_log2CUSize[0] - LOG2_UNIT_SIZE; + int shift = bestMode.cu.m_log2CUSize[0] * LOG2_UNIT_SIZE; + intptr_t stride = m_frame->m_fencPic->m_stride; + intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride; + uint64_t sum_ss = primitives.cu[blockType].var(m_frame->m_edgeBitPic + blockOffsetLuma, stride); + uint32_t sum = (uint32_t)sum_ss; + uint32_t ss = (uint32_t)(sum_ss >> 32); + uint32_t pixelCount = 1 << shift; + double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount; + + if (cuEdgeVariance > (double)m_param->edgeVarThreshold) + return false; + else + return true; + } + } uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom) { @@ -3570,7 +3600,6 @@ cnt++; } } - return cuVariance / cnt; }
View file
x265_3.3.tar.gz/source/encoder/analysis.h -> x265_3.4.tar.gz/source/encoder/analysis.h
Changed
@@ -52,7 +52,7 @@ splitRefs = 0; mvCost[0] = 0; // L0 mvCost[1] = 0; // L1 - sa8dCost = 0; + sa8dCost = 0; } }; @@ -120,7 +120,6 @@ Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext); int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU); - protected: /* Analysis data for save/load mode, writes/reads data based on absPartIdx */ x265_analysis_inter_data* m_reuseInterDataCTU;
View file
x265_3.3.tar.gz/source/encoder/api.cpp -> x265_3.4.tar.gz/source/encoder/api.cpp
Changed
@@ -1016,12 +1016,12 @@ void x265_zone_free(x265_param *param) { - if (param && param->rc.zonefileCount) { + if (param && param->rc.zones && (param->rc.zoneCount || param->rc.zonefileCount)) + { for (int i = 0; i < param->rc.zonefileCount; i++) x265_free(param->rc.zones[i].zoneParam); - } - if (param && (param->rc.zoneCount || param->rc.zonefileCount)) x265_free(param->rc.zones); + } } static const x265_api libapi = @@ -1294,6 +1294,8 @@ fprintf(csvfp, "RateFactor, "); if (param->rc.vbvBufferSize) fprintf(csvfp, "BufferFill, BufferFillFinal, "); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(csvfp, "UnclippedBufferFillFinal, "); if (param->bEnablePsnr) fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, "); if (param->bEnableSsim) @@ -1405,6 +1407,8 @@ fprintf(param->csvfpt, "%.3lf,", frameStats->rateFactor); if (param->rc.vbvBufferSize) fprintf(param->csvfpt, "%.3lf, %.3lf,", frameStats->bufferFill, frameStats->bufferFillFinal); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(param->csvfpt, "%.3lf,", frameStats->unclippedBufferFillFinal); if (param->bEnablePsnr) fprintf(param->csvfpt, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr); if (param->bEnableSsim)
View file
x265_3.3.tar.gz/source/encoder/encoder.cpp -> x265_3.4.tar.gz/source/encoder/encoder.cpp
Changed
@@ -218,10 +218,7 @@ if (m_param->bHistBasedSceneCut) { - for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; i++) - { - m_planeSizes[i] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]); - } + m_planeSizes[0] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[0]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[0]); uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1; m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); m_edgeHistThreshold = m_param->edgeTransitionThreshold; @@ -1443,9 +1440,9 @@ int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; memset(m_edgePic, 0, bufSize); - if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false)) + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1)) { - x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!"); + x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!"); return false; } @@ -1605,6 +1602,14 @@ if (m_param->bHistBasedSceneCut && pic_in) { x265_picture *pic = (x265_picture *) pic_in; + + if (pic->poc == 0) + { + /* for entire encode compute the chroma plane sizes only once */ + for (int i = 1; i < x265_cli_csps[m_param->internalCsp].planes; i++) + m_planeSizes[i] = (pic->width >> x265_cli_csps[m_param->internalCsp].width[i]) * (pic->height >> x265_cli_csps[m_param->internalCsp].height[i]); + } + if (computeHistograms(pic)) { double maxUVSad = 0.0, edgeSad = 0.0; @@ -1752,6 +1757,12 @@ } } } + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut) + { + pixel* src = m_edgePic; + primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride, + inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0); + } } else { @@ -2414,7 +2425,7 @@ encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers encParam->bEnableFastIntra = param->bEnableFastIntra; encParam->bEnableEarlySkip = param->bEnableEarlySkip; - encParam->bEnableRecursionSkip = param->bEnableRecursionSkip; + encParam->recursionSkipMode = param->recursionSkipMode; encParam->searchMethod = param->searchMethod; /* Scratch buffer prevents me_range from being increased for esa/tesa */ if (param->searchRange < encParam->searchRange) @@ -3006,6 +3017,8 @@ frameStats->ipCostRatio = curFrame->m_lowres.ipCostRatio; frameStats->bufferFill = m_rateControl->m_bufferFillActual; frameStats->bufferFillFinal = m_rateControl->m_bufferFillFinal; + if (m_param->csvLogLevel >= 2) + frameStats->unclippedBufferFillFinal = m_rateControl->m_unclippedBufferFillFinal; frameStats->frameLatency = inPoc - poc; if (m_param->rc.rateControlMode == X265_RC_CRF) frameStats->rateFactor = curEncData.m_rateFactor; @@ -3400,7 +3413,7 @@ p->maxNumReferences = zone->maxNumReferences; p->bEnableFastIntra = zone->bEnableFastIntra; p->bEnableEarlySkip = zone->bEnableEarlySkip; - p->bEnableRecursionSkip = zone->bEnableRecursionSkip; + p->recursionSkipMode = zone->recursionSkipMode; p->searchMethod = zone->searchMethod; p->searchRange = zone->searchRange; p->subpelRefine = zone->subpelRefine; @@ -3681,20 +3694,6 @@ if (p->analysisLoad && !p->analysisLoadReuseLevel) p->analysisLoadReuseLevel = 5; - if ((p->bAnalysisType == DEFAULT) && p->rc.cuTree) - { - if (p->analysisSaveReuseLevel && p->analysisSaveReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-save-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - if (p->analysisLoadReuseLevel && p->analysisLoadReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-load-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - } - if ((p->analysisLoad || p->analysisSave) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation)) { x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme, Disabling pmode/pme\n"); @@ -3867,29 +3866,30 @@ } else { - if (fread(&m_conformanceWindow.rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) + int rightOffset, bottomOffset; + if (fread(&rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window right offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.rightOffset && p->analysisLoadReuseLevel > 1) + else if (rightOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.rightOffset * scaleFactor; + padsize = rightOffset * scaleFactor; p->sourceWidth += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.rightOffset = padsize; } - if (fread(&m_conformanceWindow.bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) + if (fread(&bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window bottom offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.bottomOffset && p->analysisLoadReuseLevel > 1) + else if (bottomOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.bottomOffset * scaleFactor; + padsize = bottomOffset * scaleFactor; p->sourceHeight += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.bottomOffset = padsize; @@ -4196,7 +4196,7 @@ x265_log(p, X265_LOG_WARNING, "Radl requires fixed gop-length (keyint == min-keyint). Disabling radl.\n"); } - if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP) + if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP && m_param->bResetZoneConfig) { p->chunkStart = p->chunkEnd = 0; x265_log(p, X265_LOG_WARNING, "Chunking requires closed gop structure. Disabling chunking.\n"); @@ -4229,12 +4229,6 @@ x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n"); } - if (!m_param->bResetZoneConfig && (p->keyframeMax != p->keyframeMin)) - x265_log(p, X265_LOG_WARNING, "External zone reconfiguration requires a fixed GOP size to enable appropriate signaling of HRD info\n"); - - if (!m_param->bResetZoneConfig && (p->reconfigWindowSize != (uint64_t)p->keyframeMax)) - x265_log(p, X265_LOG_WARNING, "Zone size must be multiple of GOP size to enable appropriate signaling of HRD info\n"); - if (m_param->bEnableHME) { if (m_param->sourceHeight < 540) @@ -4311,18 +4305,27 @@ } } + uint32_t numCUsLoad, numCUsInHeightLoad; + /* Now arrived at the right frame, read the record */ analysis->poc = poc; analysis->frameRecordSize = frameRecordSize; X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType)); X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut)); X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost)); - X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); + X265_FREAD(&numCUsLoad, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions)); + /* Update analysis info to save current settings */ + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t numCUsInFrame = widthInCU * heightInCU; + analysis->numCUsInFrame = numCUsInFrame; + analysis->numCuInHeight = heightInCU; + if (m_param->bDisableLookahead) { - X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); + X265_FREAD(&numCUsInHeightLoad, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead)); } int scaledNumPartition = analysis->numPartitions; @@ -4335,16 +4338,16 @@ if (m_param->ctuDistortionRefine == CTU_DISTORTION_INTERNAL) { - X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), analysis->numCUsInFrame, m_analysisFileIn, picDistortion); + X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), numCUsLoad, m_analysisFileIn, picDistortion); computeDistortionOffset(analysis); } if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { size_t vbvCount = m_param->lookaheadDepth + m_param->bframes + 2; - X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost); - X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.vbvCost); - X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); - X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv); + X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), numCUsLoad, m_analysisFileIn, picData->lookahead.intraVbvCost); + X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), numCUsLoad, m_analysisFileIn, picData->lookahead.vbvCost); + X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), numCUsInHeightLoad, m_analysisFileIn, picData->lookahead.satdForVbv); + X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), numCUsInHeightLoad, m_analysisFileIn, picData->lookahead.intraSatdForVbv); X265_FREAD(analysis->lookahead.plannedSatd, sizeof(int64_t), vbvCount, m_analysisFileIn, picData->lookahead.plannedSatd); if (m_param->scaleFactor) @@ -4352,12 +4355,12 @@ for (uint64_t index = 0; index < vbvCount; index++) analysis->lookahead.plannedSatd[index] *= factor; - for (uint32_t i = 0; i < analysis->numCuInHeight; i++) + for (uint32_t i = 0; i < numCUsInHeightLoad; i++) { analysis->lookahead.satdForVbv[i] *= factor; analysis->lookahead.intraSatdForVbv[i] *= factor; } - for (uint32_t i = 0; i < analysis->numCUsInFrame; i++) + for (uint32_t i = 0; i < numCUsLoad; i++) { analysis->lookahead.vbvCost[i] *= factor; analysis->lookahead.intraVbvCost[i] *= factor; @@ -4407,13 +4410,13 @@ if (!m_param->scaleFactor) { - X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes); + X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); } else { - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition); - X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes); - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor) + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad * scaledNumPartition); + X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad * scaledNumPartition, m_analysisFileIn, intraPic->modes); + for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < numCUsLoad * scaledNumPartition; ctu32Idx++, cnt += factor) memset(&(analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor); X265_FREE(tempLumaBuf); } @@ -4447,7 +4450,7 @@ } if (m_param->bAnalysisType == HEVC_INFO) { - depthBytes = analysis->numCUsInFrame * analysis->numPartitions; + depthBytes = numCUsLoad * analysis->numPartitions; memcpy(((x265_analysis_inter_data *)analysis->interData)->depth, interPic->depth, depthBytes); } else @@ -4551,25 +4554,26 @@ { if (!m_param->scaleFactor) { - X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes); + X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t), numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes); } else { - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition); - X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes); - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor) + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad * scaledNumPartition); + X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad * scaledNumPartition, m_analysisFileIn, intraPic->modes); + for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < numCUsLoad * scaledNumPartition; ctu32Idx++, cnt += factor) memset(&(analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor); X265_FREE(tempLumaBuf); } } } else - X265_FREAD((analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref); + X265_FREAD((analysis->interData)->ref, sizeof(int32_t), numCUsLoad * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref); consumedBytes += frameRecordSize; if (numDir == 1) totalConsumedBytes = consumedBytes; } + #undef X265_FREAD } @@ -5032,13 +5036,14 @@ X265_PARAM_VALIDATE(saveParam->lookaheadDepth, sizeof(int), 1, &m_param->lookaheadDepth, rc - lookahead); X265_PARAM_VALIDATE(saveParam->chunkStart, sizeof(int), 1, &m_param->chunkStart, chunk-start); X265_PARAM_VALIDATE(saveParam->chunkEnd, sizeof(int), 1, &m_param->chunkEnd, chunk-end); - X265_PARAM_VALIDATE(saveParam->cuTree,sizeof(int),1,&m_param->rc.cuTree, cutree - offset); X265_PARAM_VALIDATE(saveParam->ctuDistortionRefine, sizeof(int), 1, &m_param->ctuDistortionRefine, ctu - distortion); + X265_PARAM_VALIDATE(saveParam->frameDuplication, sizeof(int), 1, &m_param->bEnableFrameDuplication, frame - dup); int sourceHeight, sourceWidth; if (writeFlag) { X265_PARAM_VALIDATE(saveParam->analysisReuseLevel, sizeof(int), 1, &m_param->analysisSaveReuseLevel, analysis - save - reuse - level); + X265_PARAM_VALIDATE(saveParam->cuTree, sizeof(int), 1, &m_param->rc.cuTree, cutree-offset); sourceHeight = m_param->sourceHeight - m_conformanceWindow.bottomOffset; sourceWidth = m_param->sourceWidth - m_conformanceWindow.rightOffset; X265_PARAM_VALIDATE(saveParam->sourceWidth, sizeof(int), 1, &sourceWidth, res-width); @@ -5073,6 +5078,15 @@ return -1; } + int bcutree; + X265_FREAD(&bcutree, sizeof(int), 1, m_analysisFileIn, &(saveParam->cuTree)); + if (loadLevel == 10 && m_param->rc.cuTree && (!bcutree || saveLevel < 2)) + { + x265_log(NULL, X265_LOG_ERROR, "Error reading cu-tree info. Disabling cutree offsets. \n"); + m_param->rc.cuTree = 0; + return -1; + } + bool error = false; int curSourceHeight = m_param->sourceHeight - m_conformanceWindow.bottomOffset; int curSourceWidth = m_param->sourceWidth - m_conformanceWindow.rightOffset; @@ -5701,7 +5715,7 @@ TOOLCMP(oldParam->maxNumReferences, newParam->maxNumReferences, "ref=%d to %d\n"); TOOLCMP(oldParam->bEnableFastIntra, newParam->bEnableFastIntra, "fast-intra=%d to %d\n"); TOOLCMP(oldParam->bEnableEarlySkip, newParam->bEnableEarlySkip, "early-skip=%d to %d\n"); - TOOLCMP(oldParam->bEnableRecursionSkip, newParam->bEnableRecursionSkip, "rskip=%d to %d\n"); + TOOLCMP(oldParam->recursionSkipMode, newParam->recursionSkipMode, "rskip=%d to %d\n"); TOOLCMP(oldParam->searchMethod, newParam->searchMethod, "me=%d to %d\n"); TOOLCMP(oldParam->searchRange, newParam->searchRange, "merange=%d to %d\n"); TOOLCMP(oldParam->subpelRefine, newParam->subpelRefine, "subme= %d to %d\n");
View file
x265_3.3.tar.gz/source/encoder/frameencoder.cpp -> x265_3.4.tar.gz/source/encoder/frameencoder.cpp
Changed
@@ -130,7 +130,7 @@ { rowSum += sliceGroupSizeAccu; m_sliceBaseRow[++sidx] = i; - } + } } X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); m_sliceBaseRow[0] = 0; @@ -448,6 +448,18 @@ m_ssimCnt = 0; memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats)); + if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + int height = m_frame->m_fencPic->m_picHeight; + int width = m_frame->m_fencPic->m_picWidth; + intptr_t stride = m_frame->m_fencPic->m_stride; + + if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1)) + { + x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !"); + } + } + /* Emit access unit delimiter unless this is the first frame and the user is * not repeating headers (since AUD is supposed to be the first NAL in the access * unit) */
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.cpp -> x265_3.4.tar.gz/source/encoder/ratecontrol.cpp
Changed
@@ -269,7 +269,7 @@ x265_log(m_param, X265_LOG_WARNING, "NAL HRD parameters require VBV parameters, ignored\n"); m_param->bEmitHRDSEI = 0; } - m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && !m_2pass && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; + m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; if (m_param->rc.bStrictCbr && !m_isCbr) { x265_log(m_param, X265_LOG_WARNING, "strict CBR set without CBR mode, ignored\n"); @@ -335,7 +335,7 @@ int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; int vbvMaxBitrate = m_param->rc.vbvMaxBitrate * 1000; - if (m_param->bEmitHRDSEI) + if (m_param->bEmitHRDSEI && !m_param->decoderVbvMaxRate) { const HRDInfo* hrd = &sps.vuiParameters.hrdParameters; vbvBufferSize = hrd->cpbSizeValue << (hrd->cpbSizeScale + CPB_SHIFT); @@ -509,6 +509,7 @@ CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold); CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh); + CMP_OPT_FIRST_PASS("frame-dup", m_param->bEnableFrameDuplication); if (m_param->bMultiPassOptRPS) { CMP_OPT_FIRST_PASS("multi-pass-opt-rps", m_param->bMultiPassOptRPS); @@ -546,7 +547,7 @@ x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); } - if (m_param->totalFrames > m_numEntries) + if (m_param->totalFrames > m_numEntries && !m_param->bEnableFrameDuplication) { x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); @@ -781,6 +782,10 @@ // Init HRD HRDInfo* hrd = &sps.vuiParameters.hrdParameters; hrd->cbrFlag = m_isCbr; + if (m_param->reconfigWindowSize) { + hrd->cbrFlag = 0; + vbvMaxBitrate = m_param->decoderVbvMaxRate * 1000; + } // normalize HRD size and rate to the value / scale notation hrd->bitRateScale = x265_clip3(0, 15, calcScale(vbvMaxBitrate) - BR_SHIFT); @@ -829,7 +834,7 @@ /* weighted average of cplx of future frames */ for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++) { - int index = m_encOrder[i + j]; + int index = i+j; RateControlEntry *rcj = &m_rce2Pass[index]; weight *= 1 - pow(rcj->iCuCount / m_ncu, 2); if (weight < 0.0001) @@ -842,7 +847,7 @@ weight = 1.0; for (int j = 0; j <= cplxBlur * 2 && j <= i; j++) { - int index = m_encOrder[i - j]; + int index = i-j; RateControlEntry *rcj = &m_rce2Pass[index]; gaussianWeight = weight * exp(-j * j / 200.0); weightSum += gaussianWeight; @@ -851,7 +856,7 @@ if (weight < .0001) break; } - m_rce2Pass[m_encOrder[i]].blurredComplexity = cplxSum / weightSum; + m_rce2Pass[i].blurredComplexity= cplxSum / weightSum; } CHECKED_MALLOC(qScale, double, m_numEntries); if (filterSize > 1) @@ -870,7 +875,7 @@ expectedBits = 1; for (int i = 0; i < m_numEntries; i++) { - RateControlEntry* rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry* rce = &m_rce2Pass[i]; double q = getQScale(rce, 1.0); expectedBits += qScale2bits(rce, q); m_lastQScaleFor[rce->sliceType] = q; @@ -893,15 +898,15 @@ /* find qscale */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry *rce = &m_rce2Pass[i]; qScale[i] = getQScale(rce, rateFactor); m_lastQScaleFor[rce->sliceType] = qScale[i]; } /* fixed I/B qscale relative to P */ - for (int i = m_numEntries - 1; i >= 0; i--) + for (int i = 0; i < m_numEntries; i++) { - qScale[i] = getDiffLimitedQScale(&m_rce2Pass[m_encOrder[i]], qScale[i]); + qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]); X265_CHECK(qScale[i] >= 0, "qScale became negative\n"); } @@ -912,7 +917,6 @@ for (int i = 0; i < m_numEntries; i++) { double q = 0.0, sum = 0.0; - for (int j = 0; j < filterSize; j++) { int idx = i + j - filterSize / 2; @@ -920,7 +924,7 @@ double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur)); if (idx < 0 || idx >= m_numEntries) continue; - if (m_rce2Pass[m_encOrder[i]].sliceType != m_rce2Pass[m_encOrder[idx]].sliceType) + if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType) continue; q += qScale[idx] * coeff; sum += coeff; @@ -932,7 +936,7 @@ /* find expected bits */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry *rce = &m_rce2Pass[i]; rce->newQScale = clipQscale(NULL, rce, blurredQscale[i]); // check if needed X265_CHECK(rce->newQScale >= 0, "new Qscale is negative\n"); expectedBits += qScale2bits(rce, rce->newQScale); @@ -1279,6 +1283,7 @@ m_param->rc.vbvMaxBitrate = m_param->rc.zones[i].zoneParam->rc.vbvMaxBitrate; memcpy(m_relativeComplexity, m_param->rc.zones[i].relativeComplexity, sizeof(double) * m_param->reconfigWindowSize); reconfigureRC(); + m_isCbr = 1; /* Always vbvmaxrate == bitrate here*/ m_top->zoneReadCount[i].incr(); } } @@ -1951,7 +1956,7 @@ /* Adjust quant based on the difference between * achieved and expected bitrate so far */ double curTime = (double)rce->encodeOrder / m_numEntries; - double w = x265_clip3(0.0, 1.0, curTime * 100); + double w = x265_clip3(0.0, 1.0, curTime); q *= pow((double)m_totalBits / m_expectedBitsSum, w); } if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled) @@ -2742,7 +2747,9 @@ x265_log(m_param, X265_LOG_WARNING, "poc:%d, VBV underflow (%.0f bits)\n", rce->poc, m_bufferFillFinal); m_bufferFillFinal = X265_MAX(m_bufferFillFinal, 0); - m_bufferFillFinal += m_bufferRate; + m_bufferFillFinal += rce->bufferRate; + if (m_param->csvLogLevel >= 2) + m_unclippedBufferFillFinal = m_bufferFillFinal; if (m_param->rc.bStrictCbr) { @@ -2752,14 +2759,14 @@ filler += FILLER_OVERHEAD * 8; } m_bufferFillFinal -= filler; - bufferBits = X265_MIN(bits + filler + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + filler + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits + filler, 0); m_bufferFillActual += bufferBits - bits - filler; } else { m_bufferFillFinal = X265_MIN(m_bufferFillFinal, m_bufferSize); - bufferBits = X265_MIN(bits + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits, 0); m_bufferFillActual += bufferBits - bits; m_bufferFillActual = X265_MIN(m_bufferFillActual, m_bufferSize);
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.h -> x265_3.4.tar.gz/source/encoder/ratecontrol.h
Changed
@@ -157,6 +157,7 @@ double m_rateFactorConstant; double m_bufferSize; double m_bufferFillFinal; /* real buffer as of the last finished frame */ + double m_unclippedBufferFillFinal; /* real unclipped buffer as of the last finished frame used to log in CSV*/ double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */ double m_bufferRate; /* # of bits added to buffer_fill after each frame */ double m_vbvMaxRate; /* in kbps */
View file
x265_3.3.tar.gz/source/encoder/slicetype.cpp -> x265_3.4.tar.gz/source/encoder/slicetype.cpp
Changed
@@ -87,7 +87,7 @@ namespace X265_NS { -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta) +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel) { intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0; intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0; @@ -141,7 +141,7 @@ theta = 180 + theta; edgeTheta[middle] = (pixel)theta; } - edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel); + edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel); } } return true; @@ -519,6 +519,13 @@ if (param->rc.aqMode == X265_AQ_EDGE) edgeFilter(curFrame, param); + if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->recursionSkipMode == EDGE_BASED_RSKIP) + { + pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX; + primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic, + curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE); + } + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE) { double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
View file
x265_3.3.tar.gz/source/encoder/slicetype.h -> x265_3.4.tar.gz/source/encoder/slicetype.h
Changed
@@ -44,9 +44,9 @@ #define EDGE_INCLINATION 45 #if HIGH_BIT_DEPTH -#define edgeThreshold 1023.0 +#define EDGE_THRESHOLD 1023.0 #else -#define edgeThreshold 255.0 +#define EDGE_THRESHOLD 255.0 #endif #define PI 3.14159265 @@ -101,7 +101,7 @@ protected: uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize); - uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); + uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp); bool allocWeightedRef(Lowres& fenc); @@ -265,7 +265,6 @@ CostEstimateGroup& operator=(const CostEstimateGroup&); }; -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta); - +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD); } #endif // ifndef X265_SLICETYPE_H
View file
x265_3.3.tar.gz/source/test/CMakeLists.txt -> x265_3.4.tar.gz/source/test/CMakeLists.txt
Changed
@@ -23,13 +23,15 @@ # add ARM assembly files if(ARM OR CROSS_COMPILE_ARM) - enable_language(ASM) - set(NASM_SRC checkasm-arm.S) - add_custom_command( - OUTPUT checkasm-arm.obj - COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj - DEPENDS checkasm-arm.S) + if(NOT ARM64) + enable_language(ASM) + set(NASM_SRC checkasm-arm.S) + add_custom_command( + OUTPUT checkasm-arm.obj + COMMAND ${CMAKE_CXX_COMPILER} + ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj + DEPENDS checkasm-arm.S) + endif() endif(ARM OR CROSS_COMPILE_ARM) # add PowerPC assembly files
View file
x265_3.3.tar.gz/source/test/regression-tests.txt -> x265_3.4.tar.gz/source/test/regression-tests.txt
Changed
@@ -75,7 +75,7 @@ News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0 News-4k.y4m,--preset superfast --slices 4 --aq-mode 0 News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16 -News-4k.y4m,--preset veryslow --no-rskip +News-4k.y4m,--preset veryslow --rskip 0 News-4k.y4m,--preset veryslow --pme --crf 40 OldTownCross_1920x1080_50_10bit_422.yuv,--preset superfast --weightp OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp @@ -162,7 +162,11 @@ sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02 - +crowd_run_1920x1080_50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5 +crowd_run_1920x1080_50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4 +crowd_run_1920x1080_50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 +crowd_run_1920x1080_50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4 + # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium
View file
x265_3.3.tar.gz/source/test/save-load-tests.txt -> x265_3.4.tar.gz/source/test/save-load-tests.txt
Changed
@@ -18,3 +18,4 @@ RaceHorses_416x240_30.y4m, --preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22 --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m, --preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2 crowd_run_540p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 crowd_run_540p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 +News-4k.y4m, --preset medium --analysis-save x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000::News-4k.y4m, --analysis-load x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
View file
x265_3.3.tar.gz/source/test/testbench.cpp -> x265_3.4.tar.gz/source/test/testbench.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -208,6 +209,14 @@ EncoderPrimitives asmprim; memset(&asmprim, 0, sizeof(asmprim)); setupAssemblyPrimitives(asmprim, test_arch[i].flag); + +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, asmprim, test_arch[i].flag); +#endif + setupAliasPrimitives(asmprim); memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives)); for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++) @@ -232,6 +241,13 @@ #endif setupAssemblyPrimitives(optprim, cpuid); +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, optprim, cpuid); +#endif + /* Note that we do not setup aliases for performance tests, that would be * redundant. The testbench only verifies they are correctly aliased */
View file
x265_3.3.tar.gz/source/test/testharness.h -> x265_3.4.tar.gz/source/test/testharness.h
Changed
@@ -3,6 +3,7 @@ * * Authors: Steve Borho <steve@borho.org> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,12 +82,16 @@ #if X265_ARCH_X86 asm volatile("rdtsc" : "=a" (a) ::"edx"); #elif X265_ARCH_ARM +#if X265_ARCH_ARM64 + asm volatile("mrs %0, cntvct_el0" : "=r"(a)); +#else // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); // TO-DO: replace clock() function with appropriate ARM cpu instructions a = clock(); #endif +#endif return a; } #endif // ifdef _MSC_VER
View file
x265_3.3.tar.gz/source/x265.cpp -> x265_3.4.tar.gz/source/x265.cpp
Changed
@@ -27,11 +27,7 @@ #include "x265.h" #include "x265cli.h" - -#include "input/input.h" -#include "output/output.h" -#include "output/reconplay.h" -#include "svt.h" +#include "abrEncApp.h" #if HAVE_VLD /* Visual Leak Detector */ @@ -47,191 +43,59 @@ #include <fstream> #include <queue> -#define CONSOLE_TITLE_SIZE 200 -#ifdef _WIN32 -#include <windows.h> -#define SetThreadExecutionState(es) -static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = ""; -#else -#define GetConsoleTitle(t, n) -#define SetConsoleTitle(t) -#define SetThreadExecutionState(es) -#endif - using namespace X265_NS; -/* Ctrl-C handler */ -static volatile sig_atomic_t b_ctrl_c /* = 0 */; -static void sigint_handler(int) -{ - b_ctrl_c = 1; -} -#define START_CODE 0x00000001 -#define START_CODE_BYTES 4 - -struct CLIOptions -{ - InputFile* input; - ReconFile* recon; - OutputFile* output; - FILE* qpfile; - FILE* zoneFile; - FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ - const char* reconPlayCmd; - const x265_api* api; - x265_param* param; - x265_vmaf_data* vmafData; - bool bProgress; - bool bForceY4m; - bool bDither; - uint32_t seek; // number of frames to skip from the beginning - uint32_t framesToBeEncoded; // number of frames to encode - uint64_t totalbytes; - int64_t startTime; - int64_t prevUpdateTime; - - /* in microseconds */ - static const int UPDATE_INTERVAL = 250000; - - CLIOptions() - { - input = NULL; - recon = NULL; - output = NULL; - qpfile = NULL; - zoneFile = NULL; - dolbyVisionRpu = NULL; - reconPlayCmd = NULL; - api = NULL; - param = NULL; - vmafData = NULL; - framesToBeEncoded = seek = 0; - totalbytes = 0; - bProgress = true; - bForceY4m = false; - startTime = x265_mdate(); - prevUpdateTime = 0; - bDither = false; - } +#define X265_HEAD_ENTRIES 3 - void destroy(); - void printStatus(uint32_t frameNum); - bool parse(int argc, char **argv); - bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount); - bool parseQPFile(x265_picture &pic_org); - bool parseZoneFile(); -}; - -void CLIOptions::destroy() -{ - if (input) - input->release(); - input = NULL; - if (recon) - recon->release(); - recon = NULL; - if (qpfile) - fclose(qpfile); - qpfile = NULL; - if (zoneFile) - fclose(zoneFile); - zoneFile = NULL; - if (dolbyVisionRpu) - fclose(dolbyVisionRpu); - dolbyVisionRpu = NULL; - if (output) - output->release(); - output = NULL; -} - -void CLIOptions::printStatus(uint32_t frameNum) -{ - char buf[200]; - int64_t time = x265_mdate(); - - if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL)) - return; - - int64_t elapsed = time - startTime; - double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0; - float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum); - if (framesToBeEncoded) - { - int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000)); - sprintf(buf, "x265 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", - 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate, - eta / 3600, (eta / 60) % 60, eta % 60); - } - else - sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate); - - fprintf(stderr, "%s \r", buf + 5); - SetConsoleTitle(buf); - fflush(stderr); // needed in windows - prevUpdateTime = time; -} +#ifdef _WIN32 +#define strdup _strdup +#endif -bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount) +#ifdef _WIN32 +/* Copy of x264 code, which allows for Unicode characters in the command line. + * Retrieve command line arguments as UTF-8. */ +static int get_argv_utf8(int *argc_ptr, char ***argv_ptr) { - bool bError = false; - int bShowHelp = false; - int outputBitDepth = 0; - const char *profile = NULL; - - /* Presets are applied before all other options. */ - for (optind = 0;;) - { - int c = getopt_long(argc, argv, short_options, long_options, NULL); - if (c == -1) - break; - else if (c == 'D') - outputBitDepth = atoi(optarg); - else if (c == 'P') - profile = optarg; - else if (c == '?') - bShowHelp = true; - } - - if (!outputBitDepth && profile) - { - /* try to derive the output bit depth from the requested profile */ - if (strstr(profile, "10")) - outputBitDepth = 10; - else if (strstr(profile, "12")) - outputBitDepth = 12; - else - outputBitDepth = 8; - } - - api = x265_api_get(outputBitDepth); - if (!api) + int ret = 0; + wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr); + if (argv_utf16) { - x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); - api = x265_api_get(0); - } + int argc = *argc_ptr; + int offset = (argc + 1) * sizeof(char*); + int size = offset; - if (bShowHelp) - { - printVersion(globalParam, api); - showHelp(globalParam); - } + for (int i = 0; i < argc; i++) + size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL); - globalParam->rc.zones[zonefileCount].zoneParam = api->param_alloc(); - if (!globalParam->rc.zones[zonefileCount].zoneParam) - { - x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); - return true; + char **argv = *argv_ptr = (char**)malloc(size); + if (argv) + { + for (int i = 0; i < argc; i++) + { + argv[i] = (char*)argv + offset; + offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, argv[i], size - offset, NULL, NULL); + } + argv[argc] = NULL; + ret = 1; + } + LocalFree(argv_utf16); } + return ret; +} +#endif - memcpy(globalParam->rc.zones[zonefileCount].zoneParam, globalParam, sizeof(x265_param)); +/* Checks for abr-ladder config file in the command line. + * Returns true if abr-config file is present. Returns + * false otherwise */ +static bool checkAbrLadder(int argc, char **argv, FILE **abrConfig) +{ for (optind = 0;;) { int long_options_index = -1; int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); if (c == -1) break; - if (long_options_index < 0 && c > 0) { for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options[0]); i++) @@ -248,593 +112,138 @@ /* getopt_long might have already printed an error message */ if (c != 63) x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); - return true; + return false; } } if (long_options_index < 0) { x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); - return true; + return false; } - - bError |= !!api->zone_param_parse(globalParam->rc.zones[zonefileCount].zoneParam, long_options[long_options_index].name, optarg); - - if (bError) + if (!strcmp(long_options[long_options_index].name, "abr-ladder")) { - const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind - 2]; - x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); + *abrConfig = x265_fopen(optarg, "rb"); + if (!abrConfig) + x265_log_file(NULL, X265_LOG_ERROR, "%s abr-ladder config file not found or error in opening zone file\n", optarg); return true; } } - - if (optind < argc) - { - x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argv[optind]); - return true; - } return false; } -bool CLIOptions::parse(int argc, char **argv) +static uint8_t getNumAbrEncodes(FILE* abrConfig) { - bool bError = false; - int bShowHelp = false; - int inputBitDepth = 8; - int outputBitDepth = 0; - int reconFileBitDepth = 0; - const char *inputfn = NULL; - const char *reconfn = NULL; - const char *outputfn = NULL; - const char *preset = NULL; - const char *tune = NULL; - const char *profile = NULL; - int svtEnabled = 0; - - if (argc <= 1) - { - x265_log(NULL, X265_LOG_ERROR, "No input file. Run x265 --help for a list of options.\n"); - return true; - } - - /* Presets are applied before all other options. */ - for (optind = 0;; ) - { - int optionsIndex = -1; - int c = getopt_long(argc, argv, short_options, long_options, &optionsIndex); - if (c == -1) - break; - else if (c == 'p') - preset = optarg; - else if (c == 't') - tune = optarg; - else if (c == 'D') - outputBitDepth = atoi(optarg); - else if (c == 'P') - profile = optarg; - else if (c == '?') - bShowHelp = true; - else if (!c && !strcmp(long_options[optionsIndex].name, "svt")) - svtEnabled = 1; - } + char line[1024]; + uint8_t numEncodes = 0; - if (!outputBitDepth && profile) + while (fgets(line, sizeof(line), abrConfig)) { - /* try to derive the output bit depth from the requested profile */ - if (strstr(profile, "10")) - outputBitDepth = 10; - else if (strstr(profile, "12")) - outputBitDepth = 12; - else - outputBitDepth = 8; - } - - api = x265_api_get(outputBitDepth); - if (!api) - { - x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); - api = x265_api_get(0); - } - - param = api->param_alloc(); - if (!param) - { - x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); - return true; - } -#if ENABLE_LIBVMAF - vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data)); - if(!vmafData) - { - x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n"); - return true; - } -#endif - - if (api->param_default_preset(param, preset, tune) < 0) - { - x265_log(NULL, X265_LOG_ERROR, "preset or tune unrecognized\n"); - return true; - } - - if (bShowHelp) - { - printVersion(param, api); - showHelp(param); + if (strcmp(line, "\n") == 0) + continue; + else if (!(*line == '#')) + numEncodes++; } + rewind(abrConfig); + return numEncodes; +} - //Set enable SVT-HEVC encoder first if found in the command line - if (svtEnabled) api->param_parse(param, "svt", NULL); +static bool parseAbrConfig(FILE* abrConfig, CLIOptions cliopt[], uint8_t numEncodes) +{ + char line[1024]; + char* argLine; - for (optind = 0;; ) + for (uint32_t i = 0; i < numEncodes; i++) { - int long_options_index = -1; - int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); - if (c == -1) - break; - - switch (c) + fgets(line, sizeof(line), abrConfig); + if (*line == '#' || (strcmp(line, "\r\n") == 0)) + continue; + int index = (int)strcspn(line, "\r\n"); + line[index] = '\0'; + argLine = line; + char* start = strchr(argLine, ' '); + while (isspace((unsigned char)*start)) start++; + int argc = 0; + char **argv = (char**)malloc(256 * sizeof(char *)); + // Adding a dummy string to avoid file parsing error + argv[argc++] = (char *)"x265"; + + /* Parse CLI header to identify the ID of the load encode and the reuse level */ + char *header = strtok(argLine, "[]"); + uint32_t idCount = 0; + char *id = strtok(header, ":"); + char *head[X265_HEAD_ENTRIES]; + cliopt[i].encId = i; + cliopt[i].isAbrLadderConfig = true; + + while (id && (idCount <= X265_HEAD_ENTRIES)) { - case 'h': - printVersion(param, api); - showHelp(param); - break; - - case 'V': - printVersion(param, api); - x265_report_simd(param); - exit(0); - - default: - if (long_options_index < 0 && c > 0) - { - for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options[0]); i++) - { - if (long_options[i].val == c) - { - long_options_index = (int)i; - break; - } - } - - if (long_options_index < 0) - { - /* getopt_long might have already printed an error message */ - if (c != 63) - x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); - return true; - } - } - if (long_options_index < 0) - { - x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); - return true; - } -#define OPT(longname) \ - else if (!strcmp(long_options[long_options_index].name, longname)) -#define OPT2(name1, name2) \ - else if (!strcmp(long_options[long_options_index].name, name1) || \ - !strcmp(long_options[long_options_index].name, name2)) - - if (0) ; - OPT2("frame-skip", "seek") this->seek = (uint32_t)x265_atoi(optarg, bError); - OPT("frames") this->framesToBeEncoded = (uint32_t)x265_atoi(optarg, bError); - OPT("no-progress") this->bProgress = false; - OPT("output") outputfn = optarg; - OPT("input") inputfn = optarg; - OPT("recon") reconfn = optarg; - OPT("input-depth") inputBitDepth = (uint32_t)x265_atoi(optarg, bError); - OPT("dither") this->bDither = true; - OPT("recon-depth") reconFileBitDepth = (uint32_t)x265_atoi(optarg, bError); - OPT("y4m") this->bForceY4m = true; - OPT("profile") /* handled above */; - OPT("preset") /* handled above */; - OPT("tune") /* handled above */; - OPT("output-depth") /* handled above */; - OPT("recon-y4m-exec") reconPlayCmd = optarg; - OPT("svt") /* handled above */; - OPT("qpfile") - { - this->qpfile = x265_fopen(optarg, "rb"); - if (!this->qpfile) - x265_log_file(param, X265_LOG_ERROR, "%s qpfile not found or error in opening qp file\n", optarg); - } - OPT("dolby-vision-rpu") - { - this->dolbyVisionRpu = x265_fopen(optarg, "rb"); - if (!this->dolbyVisionRpu) - { - x265_log_file(param, X265_LOG_ERROR, "Dolby Vision RPU metadata file %s not found or error in opening file\n", optarg); - return true; - } - } - OPT("zonefile") - { - this->zoneFile = x265_fopen(optarg, "rb"); - if (!this->zoneFile) - x265_log_file(param, X265_LOG_ERROR, "%s zone file not found or error in opening zone file\n", optarg); - } - OPT("fullhelp") - { - param->logLevel = X265_LOG_FULL; - printVersion(param, api); - showHelp(param); - break; - } - else - bError |= !!api->param_parse(param, long_options[long_options_index].name, optarg); - if (bError) - { - const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind - 2]; - x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); - return true; - } -#undef OPT + head[idCount] = id; + id = strtok(NULL, ":"); + idCount++; } - } - - if (optind < argc && !inputfn) - inputfn = argv[optind++]; - if (optind < argc && !outputfn) - outputfn = argv[optind++]; - if (optind < argc) - { - x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argv[optind]); - return true; - } - - if (argc <= 1) - { - api->param_default(param); - printVersion(param, api); - showHelp(param); - } - - if (!inputfn || !outputfn) - { - x265_log(param, X265_LOG_ERROR, "input or output file not specified, try --help for help\n"); - return true; - } - - if (param->internalBitDepth != api->bit_depth) - { - x265_log(param, X265_LOG_ERROR, "Only bit depths of %d are supported in this build\n", api->bit_depth); - return true; - } - -#ifdef SVT_HEVC - if (svtEnabled) - { - EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; - param->sourceWidth = svtParam->sourceWidth; - param->sourceHeight = svtParam->sourceHeight; - param->fpsNum = svtParam->frameRateNumerator; - param->fpsDenom = svtParam->frameRateDenominator; - svtParam->encoderBitDepth = inputBitDepth; - } -#endif - - InputFileInfo info; - info.filename = inputfn; - info.depth = inputBitDepth; - info.csp = param->internalCsp; - info.width = param->sourceWidth; - info.height = param->sourceHeight; - info.fpsNum = param->fpsNum; - info.fpsDenom = param->fpsDenom; - info.sarWidth = param->vui.sarWidth; - info.sarHeight = param->vui.sarHeight; - info.skipFrames = seek; - info.frameCount = 0; - getParamAspectRatio(param, info.sarWidth, info.sarHeight); - - - this->input = InputFile::open(info, this->bForceY4m); - if (!this->input || this->input->isFail()) - { - x265_log_file(param, X265_LOG_ERROR, "unable to open input file <%s>\n", inputfn); - return true; - } - - if (info.depth < 8 || info.depth > 16) - { - x265_log(param, X265_LOG_ERROR, "Input bit depth (%d) must be between 8 and 16\n", inputBitDepth); - return true; - } - - /* Unconditionally accept height/width/csp/bitDepth from file info */ - param->sourceWidth = info.width; - param->sourceHeight = info.height; - param->internalCsp = info.csp; - param->sourceBitDepth = info.depth; - - /* Accept fps and sar from file info if not specified by user */ - if (param->fpsDenom == 0 || param->fpsNum == 0) - { - param->fpsDenom = info.fpsDenom; - param->fpsNum = info.fpsNum; - } - if (!param->vui.aspectRatioIdc && info.sarWidth && info.sarHeight) - setParamAspectRatio(param, info.sarWidth, info.sarHeight); - if (this->framesToBeEncoded == 0 && info.frameCount > (int)seek) - this->framesToBeEncoded = info.frameCount - seek; - param->totalFrames = this->framesToBeEncoded; - -#ifdef SVT_HEVC - if (svtEnabled) - { - EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; - svtParam->sourceWidth = param->sourceWidth; - svtParam->sourceHeight = param->sourceHeight; - svtParam->frameRateNumerator = param->fpsNum; - svtParam->frameRateDenominator = param->fpsDenom; - svtParam->framesToBeEncoded = param->totalFrames; - svtParam->encoderColorFormat = (EB_COLOR_FORMAT)param->internalCsp; - } -#endif - - /* Force CFR until we have support for VFR */ - info.timebaseNum = param->fpsDenom; - info.timebaseDenom = param->fpsNum; - - if (param->bField && param->interlaceMode) - { // Field FPS - param->fpsNum *= 2; - // Field height - param->sourceHeight = param->sourceHeight >> 1; - // Number of fields to encode - param->totalFrames *= 2; - } - - if (api->param_apply_profile(param, profile)) - return true; - - if (param->logLevel >= X265_LOG_INFO) - { - char buf[128]; - int p = sprintf(buf, "%dx%d fps %d/%d %sp%d", param->sourceWidth, param->sourceHeight, - param->fpsNum, param->fpsDenom, x265_source_csp_names[param->internalCsp], info.depth); - - int width, height; - getParamAspectRatio(param, width, height); - if (width && height) - p += sprintf(buf + p, " sar %d:%d", width, height); - - if (framesToBeEncoded <= 0 || info.frameCount <= 0) - strcpy(buf + p, " unknown frame count"); - else - sprintf(buf + p, " frames %u - %d of %d", this->seek, this->seek + this->framesToBeEncoded - 1, info.frameCount); - - general_log(param, input->getName(), X265_LOG_INFO, "%s\n", buf); - } - - this->input->startReader(); - - if (reconfn) - { - if (reconFileBitDepth == 0) - reconFileBitDepth = param->internalBitDepth; - this->recon = ReconFile::open(reconfn, param->sourceWidth, param->sourceHeight, reconFileBitDepth, - param->fpsNum, param->fpsDenom, param->internalCsp); - if (this->recon->isFail()) + if (idCount != X265_HEAD_ENTRIES) { - x265_log(param, X265_LOG_WARNING, "unable to write reconstructed outputs file\n"); - this->recon->release(); - this->recon = 0; + x265_log(NULL, X265_LOG_ERROR, "Incorrect number of arguments in ABR CLI header at line %d\n", i); + return false; } else - general_log(param, this->recon->getName(), X265_LOG_INFO, - "reconstructed images %dx%d fps %d/%d %s\n", - param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom, - x265_source_csp_names[param->internalCsp]); - } -#if ENABLE_LIBVMAF - if (!reconfn) - { - x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n"); - return true; - } - const char *str = strrchr(info.filename, '.'); - - if (!strcmp(str, ".y4m")) - { - x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n"); - return true; - } - if(param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444) - { - vmafData->reference_file = x265_fopen(inputfn, "rb"); - vmafData->distorted_file = x265_fopen(reconfn, "rb"); - } - else - { - x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n"); - return true; - } -#endif - this->output = OutputFile::open(outputfn, info); - if (this->output->isFail()) - { - x265_log_file(param, X265_LOG_ERROR, "failed to open output file <%s> for writing\n", outputfn); - return true; - } - general_log_file(param, this->output->getName(), X265_LOG_INFO, "output file: %s\n", outputfn); - return false; -} - -bool CLIOptions::parseQPFile(x265_picture &pic_org) -{ - int32_t num = -1, qp, ret; - char type; - uint32_t filePos; - pic_org.forceqp = 0; - pic_org.sliceType = X265_TYPE_AUTO; - while (num < pic_org.poc) - { - filePos = ftell(qpfile); - qp = -1; - ret = fscanf(qpfile, "%d %c%*[ \t]%d\n", &num, &type, &qp); - - if (num > pic_org.poc || ret == EOF) { - fseek(qpfile, filePos, SEEK_SET); - break; + cliopt[i].encName = strdup(head[0]); + cliopt[i].loadLevel = atoi(head[1]); + cliopt[i].reuseName = strdup(head[2]); } - if (num < pic_org.poc && ret >= 2) - continue; - if (ret == 3 && qp >= 0) - pic_org.forceqp = qp + 1; - if (type == 'I') pic_org.sliceType = X265_TYPE_IDR; - else if (type == 'i') pic_org.sliceType = X265_TYPE_I; - else if (type == 'K') pic_org.sliceType = param->bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR; - else if (type == 'P') pic_org.sliceType = X265_TYPE_P; - else if (type == 'B') pic_org.sliceType = X265_TYPE_BREF; - else if (type == 'b') pic_org.sliceType = X265_TYPE_B; - else ret = 0; - if (ret < 2 || qp < -1 || qp > 51) - return 0; - } - return 1; -} -bool CLIOptions::parseZoneFile() -{ - char line[256]; - char* argLine; - param->rc.zonefileCount = 0; - - while (fgets(line, sizeof(line), zoneFile)) - { - if (!((*line == '#') || (strcmp(line, "\r\n") == 0))) - param->rc.zonefileCount++; - } - - rewind(zoneFile); - param->rc.zones = X265_MALLOC(x265_zone, param->rc.zonefileCount); - for (int i = 0; i < param->rc.zonefileCount; i++) - { - while (fgets(line, sizeof(line), zoneFile)) + char* token = strtok(start, " "); + while (token) { - if (*line == '#' || (strcmp(line, "\r\n") == 0)) - continue; - param->rc.zones[i].zoneParam = X265_MALLOC(x265_param, 1); - int index = (int)strcspn(line, "\r\n"); - line[index] = '\0'; - argLine = line; - while (isspace((unsigned char)*argLine)) argLine++; - char* start = strchr(argLine, ' '); - start++; - param->rc.zones[i].startFrame = atoi(argLine); - int argCount = 0; - char **args = (char**)malloc(256 * sizeof(char *)); - // Adding a dummy string to avoid file parsing error - args[argCount++] = (char *)"x265"; - char* token = strtok(start, " "); - while (token) - { - args[argCount++] = token; - token = strtok(NULL, " "); - } - args[argCount] = NULL; - CLIOptions cliopt; - if (cliopt.parseZoneParam(argCount, args,param, i)) - { - cliopt.destroy(); - if (cliopt.api) - cliopt.api->param_free(cliopt.param); - exit(1); - } - break; + argv[argc++] = strdup(token); + token = strtok(NULL, " "); } - } - return 1; -} - -#ifdef _WIN32 -/* Copy of x264 code, which allows for Unicode characters in the command line. - * Retrieve command line arguments as UTF-8. */ -static int get_argv_utf8(int *argc_ptr, char ***argv_ptr) -{ - int ret = 0; - wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr); - if (argv_utf16) - { - int argc = *argc_ptr; - int offset = (argc + 1) * sizeof(char*); - int size = offset; - - for (int i = 0; i < argc; i++) - size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, NULL, 0, NULL, NULL); - - char **argv = *argv_ptr = (char**)malloc(size); - if (argv) + argv[argc] = NULL; + if (cliopt[i].parse(argc++, argv)) { - for (int i = 0; i < argc; i++) - { - argv[i] = (char*)argv + offset; - offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, argv[i], size - offset, NULL, NULL); - } - argv[argc] = NULL; - ret = 1; + cliopt[i].destroy(); + if (cliopt[i].api) + cliopt[i].api->param_free(cliopt[i].param); + exit(1); } - LocalFree(argv_utf16); } - return ret; + return true; } -#endif -/* Parse the RPU file and extract the RPU corresponding to the current picture - * and fill the rpu field of the input picture */ -static int rpuParser(x265_picture * pic, FILE * ptr) +static bool setRefContext(CLIOptions cliopt[], uint32_t numEncodes) { - uint8_t byteVal; - uint32_t code = 0; - int bytesRead = 0; - pic->rpu.payloadSize = 0; + bool hasRef = false; + bool isRefFound = false; - if (!pic->pts) + /* Identify reference encode IDs and set save/load reuse levels */ + for (uint32_t curEnc = 0; curEnc < numEncodes; curEnc++) { - while (bytesRead++ < 4 && fread(&byteVal, sizeof(uint8_t), 1, ptr)) - code = (code << 8) | byteVal; - - if (code != START_CODE) + isRefFound = false; + hasRef = !strcmp(cliopt[curEnc].reuseName, "nil") ? false : true; + if (hasRef) { - x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU startcode in POC %d\n", pic->pts); - return 1; - } - } - - bytesRead = 0; - while (fread(&byteVal, sizeof(uint8_t), 1, ptr)) - { - code = (code << 8) | byteVal; - if (bytesRead++ < 3) - continue; - if (bytesRead >= 1024) - { - x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU size in POC %d\n", pic->pts); - return 1; + for (uint32_t refEnc = 0; refEnc < numEncodes; refEnc++) + { + if (!strcmp(cliopt[curEnc].reuseName, cliopt[refEnc].encName)) + { + cliopt[curEnc].refId = refEnc; + cliopt[refEnc].numRefs++; + cliopt[refEnc].saveLevel = X265_MAX(cliopt[refEnc].saveLevel, cliopt[curEnc].loadLevel); + isRefFound = true; + break; + } + } + if (!isRefFound) + { + x265_log(NULL, X265_LOG_ERROR, "Reference encode (%s) not found for %s\n", cliopt[curEnc].reuseName, + cliopt[curEnc].encName); + return false; + } } - - if (code != START_CODE) - pic->rpu.payload[pic->rpu.payloadSize++] = (code >> (3 * 8)) & 0xFF; - else - return 0; } - - int ShiftBytes = START_CODE_BYTES - (bytesRead - pic->rpu.payloadSize); - int bytesLeft = bytesRead - pic->rpu.payloadSize; - code = (code << ShiftBytes * 8); - for (int i = 0; i < bytesLeft; i++) - { - pic->rpu.payload[pic->rpu.payloadSize++] = (code >> (3 * 8)) & 0xFF; - code = (code << 8); - } - if (!pic->rpu.payloadSize) - x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU not found for POC %d\n", pic->pts); - return 0; + return true; } - - /* CLI return codes: * * 0 - encode successful @@ -859,354 +268,57 @@ get_argv_utf8(&argc, &argv); #endif - ReconPlay* reconPlay = NULL; - CLIOptions cliopt; + uint8_t numEncodes = 1; + FILE *abrConfig = NULL; + bool isAbrLadder = checkAbrLadder(argc, argv, &abrConfig); - if (cliopt.parse(argc, argv)) - { - cliopt.destroy(); - if (cliopt.api) - cliopt.api->param_free(cliopt.param); - exit(1); - } + if (isAbrLadder) + numEncodes = getNumAbrEncodes(abrConfig); - x265_param* param = cliopt.param; - const x265_api* api = cliopt.api; -#if ENABLE_LIBVMAF - x265_vmaf_data* vmafdata = cliopt.vmafData; -#endif - /* This allows muxers to modify bitstream format */ - cliopt.output->setParam(param); + CLIOptions* cliopt = new CLIOptions[numEncodes]; - if (cliopt.reconPlayCmd) - reconPlay = new ReconPlay(cliopt.reconPlayCmd, *param); - - if (cliopt.zoneFile) + if (isAbrLadder) { - if (!cliopt.parseZoneFile()) - { - x265_log(NULL, X265_LOG_ERROR, "Unable to parse zonefile\n"); - fclose(cliopt.zoneFile); - cliopt.zoneFile = NULL; - } + if (!parseAbrConfig(abrConfig, cliopt, numEncodes)) + exit(1); + if (!setRefContext(cliopt, numEncodes)) + exit(1); } - - /* note: we could try to acquire a different libx265 API here based on - * the profile found during option parsing, but it must be done before - * opening an encoder */ - - x265_encoder *encoder = api->encoder_open(param); - if (!encoder) + else if (cliopt[0].parse(argc, argv)) { - x265_log(param, X265_LOG_ERROR, "failed to open encoder\n"); - cliopt.destroy(); - api->param_free(param); - api->cleanup(); - exit(2); + cliopt[0].destroy(); + if (cliopt[0].api) + cliopt[0].api->param_free(cliopt[0].param); + exit(1); } - /* get the encoder parameters post-initialization */ - api->encoder_parameters(encoder, param); - - /* Control-C handler */ - if (signal(SIGINT, sigint_handler) == SIG_ERR) - x265_log(param, X265_LOG_ERROR, "Unable to register CTRL+C handler: %s\n", strerror(errno)); - - x265_picture pic_orig, pic_out; - x265_picture *pic_in = &pic_orig; - /* Allocate recon picture if analysis save/load is enabled */ - std::priority_queue<int64_t>* pts_queue = cliopt.output->needPTS() ? new std::priority_queue<int64_t>() : NULL; - x265_picture *pic_recon = (cliopt.recon || param->analysisSave || param->analysisLoad || pts_queue || reconPlay || param->csvLogLevel) ? &pic_out : NULL; - uint32_t inFrameCount = 0; - uint32_t outFrameCount = 0; - x265_nal *p_nal; - x265_stats stats; - uint32_t nal; - int16_t *errorBuf = NULL; - bool bDolbyVisionRPU = false; - uint8_t *rpuPayload = NULL; int ret = 0; - int inputPicNum = 1; - x265_picture picField1, picField2; - - if (!param->bRepeatHeaders && !param->bEnableSvtHevc) - { - if (api->encoder_headers(encoder, &p_nal, &nal) < 0) - { - x265_log(param, X265_LOG_ERROR, "Failure generating stream headers\n"); - ret = 3; - goto fail; - } - else - cliopt.totalbytes += cliopt.output->writeHeaders(p_nal, nal); - } - - if (param->bField && param->interlaceMode) - { - api->picture_init(param, &picField1); - api->picture_init(param, &picField2); - // return back the original height of input - param->sourceHeight *= 2; - api->picture_init(param, pic_in); - } - else - api->picture_init(param, pic_in); - - if (param->dolbyProfile && cliopt.dolbyVisionRpu) - { - rpuPayload = X265_MALLOC(uint8_t, 1024); - pic_in->rpu.payload = rpuPayload; - if (pic_in->rpu.payload) - bDolbyVisionRPU = true; - } - - if (cliopt.bDither) - { - errorBuf = X265_MALLOC(int16_t, param->sourceWidth + 1); - if (errorBuf) - memset(errorBuf, 0, (param->sourceWidth + 1) * sizeof(int16_t)); - else - cliopt.bDither = false; - } - - // main encoder loop - while (pic_in && !b_ctrl_c) - { - pic_orig.poc = (param->bField && param->interlaceMode) ? inFrameCount * 2 : inFrameCount; - if (cliopt.qpfile) - { - if (!cliopt.parseQPFile(pic_orig)) - { - x265_log(NULL, X265_LOG_ERROR, "can't parse qpfile for frame %d\n", pic_in->poc); - fclose(cliopt.qpfile); - cliopt.qpfile = NULL; - } - } - - if (cliopt.framesToBeEncoded && inFrameCount >= cliopt.framesToBeEncoded) - pic_in = NULL; - else if (cliopt.input->readPicture(pic_orig)) - inFrameCount++; - else - pic_in = NULL; - - if (pic_in) - { - if (pic_in->bitDepth > param->internalBitDepth && cliopt.bDither) - { - x265_dither_image(pic_in, cliopt.input->getWidth(), cliopt.input->getHeight(), errorBuf, param->internalBitDepth); - pic_in->bitDepth = param->internalBitDepth; - } - /* Overwrite PTS */ - pic_in->pts = pic_in->poc; - - // convert to field - if (param->bField && param->interlaceMode) - { - int height = pic_in->height >> 1; - - int static bCreated = 0; - if (bCreated == 0) - { - bCreated = 1; - inputPicNum = 2; - picField1.fieldNum = 1; - picField2.fieldNum = 2; - - picField1.bitDepth = picField2.bitDepth = pic_in->bitDepth; - picField1.colorSpace = picField2.colorSpace = pic_in->colorSpace; - picField1.height = picField2.height = pic_in->height >> 1; - picField1.framesize = picField2.framesize = pic_in->framesize >> 1; - - size_t fieldFrameSize = (size_t)pic_in->framesize >> 1; - char* field1Buf = X265_MALLOC(char, fieldFrameSize); - char* field2Buf = X265_MALLOC(char, fieldFrameSize); - - int stride = picField1.stride[0] = picField2.stride[0] = pic_in->stride[0]; - uint64_t framesize = stride * (height >> x265_cli_csps[pic_in->colorSpace].height[0]); - picField1.planes[0] = field1Buf; - picField2.planes[0] = field2Buf; - for (int i = 1; i < x265_cli_csps[pic_in->colorSpace].planes; i++) - { - picField1.planes[i] = field1Buf + framesize; - picField2.planes[i] = field2Buf + framesize; - - stride = picField1.stride[i] = picField2.stride[i] = pic_in->stride[i]; - framesize += (stride * (height >> x265_cli_csps[pic_in->colorSpace].height[i])); - } - assert(framesize == picField1.framesize); - } - - picField1.pts = picField1.poc = pic_in->poc; - picField2.pts = picField2.poc = pic_in->poc + 1; - - picField1.userSEI = picField2.userSEI = pic_in->userSEI; - - //if (pic_in->userData) - //{ - // // Have to handle userData here - //} - - if (pic_in->framesize) - { - for (int i = 0; i < x265_cli_csps[pic_in->colorSpace].planes; i++) - { - char* srcP1 = (char*)pic_in->planes[i]; - char* srcP2 = (char*)pic_in->planes[i] + pic_in->stride[i]; - char* p1 = (char*)picField1.planes[i]; - char* p2 = (char*)picField2.planes[i]; - int stride = picField1.stride[i]; - - for (int y = 0; y < (height >> x265_cli_csps[pic_in->colorSpace].height[i]); y++) - { - memcpy(p1, srcP1, stride); - memcpy(p2, srcP2, stride); - srcP1 += 2*stride; - srcP2 += 2*stride; - p1 += stride; - p2 += stride; - } - } - } - } - - if (bDolbyVisionRPU) - { - if (param->bField && param->interlaceMode) - { - if (rpuParser(&picField1, cliopt.dolbyVisionRpu) > 0) - goto fail; - if (rpuParser(&picField2, cliopt.dolbyVisionRpu) > 0) - goto fail; - } - else - { - if (rpuParser(pic_in, cliopt.dolbyVisionRpu) > 0) - goto fail; - } - } - } - - for (int inputNum = 0; inputNum < inputPicNum; inputNum++) - { - x265_picture *picInput = NULL; - if (inputPicNum == 2) - picInput = pic_in ? (inputNum ? &picField2 : &picField1) : NULL; - else - picInput = pic_in; - - int numEncoded = api->encoder_encode( encoder, &p_nal, &nal, picInput, pic_recon ); - if( numEncoded < 0 ) - { - b_ctrl_c = 1; - ret = 4; - break; - } - - if (reconPlay && numEncoded) - reconPlay->writePicture(*pic_recon); - - outFrameCount += numEncoded; - - if (numEncoded && pic_recon && cliopt.recon) - cliopt.recon->writePicture(pic_out); - if (nal) - { - cliopt.totalbytes += cliopt.output->writeFrame(p_nal, nal, pic_out); - if (pts_queue) - { - pts_queue->push(-pic_out.pts); - if (pts_queue->size() > 2) - pts_queue->pop(); - } - } - cliopt.printStatus( outFrameCount ); - } - } - - /* Flush the encoder */ - while (!b_ctrl_c) + AbrEncoder* abrEnc = new AbrEncoder(cliopt, numEncodes, ret); + int threadsActive = abrEnc->m_numActiveEncodes.get(); + while (threadsActive) { - int numEncoded = api->encoder_encode(encoder, &p_nal, &nal, NULL, pic_recon); - if (numEncoded < 0) - { - ret = 4; - break; - } - - if (reconPlay && numEncoded) - reconPlay->writePicture(*pic_recon); - - outFrameCount += numEncoded; - if (numEncoded && pic_recon && cliopt.recon) - cliopt.recon->writePicture(pic_out); - if (nal) + threadsActive = abrEnc->m_numActiveEncodes.waitForChange(threadsActive); + for (uint8_t idx = 0; idx < numEncodes; idx++) { - cliopt.totalbytes += cliopt.output->writeFrame(p_nal, nal, pic_out); - if (pts_queue) + if (abrEnc->m_passEnc[idx]->m_ret) { - pts_queue->push(-pic_out.pts); - if (pts_queue->size() > 2) - pts_queue->pop(); - } + if (isAbrLadder) + x265_log(NULL, X265_LOG_INFO, "Error generating ABR-ladder \n"); + ret = abrEnc->m_passEnc[idx]->m_ret; + threadsActive = 0; + break; + } } - - cliopt.printStatus(outFrameCount); - - if (!numEncoded) - break; - } - - if (bDolbyVisionRPU) - { - if(fgetc(cliopt.dolbyVisionRpu) != EOF) - x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU count is greater than frame count\n"); - x265_log(NULL, X265_LOG_INFO, "VES muxing with Dolby Vision RPU file successful\n"); } - /* clear progress report */ - if (cliopt.bProgress) - fprintf(stderr, "%*s\r", 80, " "); - -fail: - - delete reconPlay; - - api->encoder_get_stats(encoder, &stats, sizeof(stats)); - if (param->csvfn && !b_ctrl_c) -#if ENABLE_LIBVMAF - api->vmaf_encoder_log(encoder, argc, argv, param, vmafdata); -#else - api->encoder_log(encoder, argc, argv); -#endif - api->encoder_close(encoder); - - int64_t second_largest_pts = 0; - int64_t largest_pts = 0; - if (pts_queue && pts_queue->size() >= 2) - { - second_largest_pts = -pts_queue->top(); - pts_queue->pop(); - largest_pts = -pts_queue->top(); - pts_queue->pop(); - delete pts_queue; - pts_queue = NULL; - } - cliopt.output->closeFile(largest_pts, second_largest_pts); - - if (b_ctrl_c) - general_log(param, NULL, X265_LOG_INFO, "aborted at input frame %d, output frame %d\n", - cliopt.seek + inFrameCount, stats.encodedPictureCount); - - api->cleanup(); /* Free library singletons */ - - cliopt.destroy(); + abrEnc->destroy(); + delete abrEnc; - api->param_free(param); + for (uint8_t idx = 0; idx < numEncodes; idx++) + cliopt[idx].destroy(); - X265_FREE(errorBuf); - X265_FREE(rpuPayload); + delete[] cliopt; SetConsoleTitle(orgConsoleTitle); SetThreadExecutionState(ES_CONTINUOUS);
View file
x265_3.3.tar.gz/source/x265.h -> x265_3.4.tar.gz/source/x265.h
Changed
@@ -134,6 +134,7 @@ int ctuDistortionRefine; int rightOffset; int bottomOffset; + int frameDuplication; }x265_analysis_validate; /* Stores intra analysis data for a single frame. This struct needs better packing */ @@ -304,6 +305,7 @@ double totalFrameTime; double vmafFrameScore; double bufferFillFinal; + double unclippedBufferFillFinal; } x265_frame_stats; typedef struct x265_ctu_info_t @@ -1255,9 +1257,9 @@ * skip blocks. Default is disabled */ int bEnableEarlySkip; - /* Enable early CU size decisions to avoid recursing to higher depths. + /* Enable early CU size decisions to avoid recursing to higher depths. * Default is enabled */ - int bEnableRecursionSkip; + int recursionSkipMode; /* Use a faster search method to find the best intra mode. Default is 0 */ int bEnableFastIntra; @@ -1857,7 +1859,7 @@ double edgeTransitionThreshold; /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */ - int bHistBasedSceneCut; + int bHistBasedSceneCut; /* Enable HME search ranges for L0, L1 and L2 respectively. */ int hmeRange[3]; @@ -1874,7 +1876,7 @@ * analysis information stored in analysis-save. Higher the refine level higher * the information stored. Default is 5 */ int analysisSaveReuseLevel; - + /* A value between 1 and 10 (both inclusive) determines the level of * analysis information reused in analysis-load. Higher the refine level higher * the information reused. Default is 5 */ @@ -1901,6 +1903,12 @@ * info is available from the corresponding analysis-save. */ int confWinBottomOffset; + + /* Edge variance threshold for quad tree establishment. */ + float edgeVarThreshold; + + /* Maxrate that could be signaled to the decoder. Default 0. API only. */ + int decoderVbvMaxRate; } x265_param; /* x265_param_alloc:
View file
x265_3.4.tar.gz/source/x265cli.cpp
Added
@@ -0,0 +1,1062 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Steve Borho <steve@borho.org> + * Min Chen <chenm003@163.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ +#if _MSC_VER +#pragma warning(disable: 4127) // conditional expression is constant, yes I know +#endif + +#include "x265cli.h" +#include "svt.h" + +#define START_CODE 0x00000001 +#define START_CODE_BYTES 4 + +#ifdef __cplusplus +namespace X265_NS { +#endif + + static void printVersion(x265_param *param, const x265_api* api) + { + x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); + x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); + } + + static void showHelp(x265_param *param) + { + int level = param->logLevel; + +#define OPT(value) (value ? "enabled" : "disabled") +#define H0 printf +#define H1 if (level >= X265_LOG_DEBUG) printf + + H0("\nSyntax: x265 [options] infile [-o] outfile\n"); + H0(" infile can be YUV or Y4M\n"); + H0(" outfile is raw HEVC bitstream\n"); + H0("\nExecutable Options:\n"); + H0("-h/--help Show this help text and exit\n"); + H0(" --fullhelp Show all options and exit\n"); + H0("-V/--version Show version info and exit\n"); + H0("\nOutput Options:\n"); + H0("-o/--output <filename> Bitstream output file name\n"); + H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); + H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]); + H0(" --no-progress Disable CLI progress reports\n"); + H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); + H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); + H0("\nInput Options:\n"); + H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); + H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); + H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); + H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n"); + H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); + H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); + H1(" 0 - i400 (4:0:0 monochrome)\n"); + H1(" 1 - i420 (4:2:0 default)\n"); + H1(" 2 - i422 (4:2:2)\n"); + H1(" 3 - i444 (4:4:4)\n"); +#if ENABLE_HDR10_PLUS + H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); + H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); +#endif + H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); + H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" + " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); + H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); + H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); + H0(" --seek <integer> First frame to encode\n"); + H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); + H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT(param->bField)); + H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); + H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); + H0("\nQuality reporting metrics:\n"); + H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); + H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); + H0("\nProfile, Level, Tier:\n"); + H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); + H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); + H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); + H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); + H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); + H0("\nThreading, performance:\n"); + H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); + H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); + H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); + H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); + H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); + H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); + H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); + H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n"); + H0("\nPresets:\n"); + H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); + H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); + H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); + H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); + H0("\nQuad-Tree size and depth:\n"); + H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); + H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); + H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); + H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); + H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); + H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); + H0("\nAnalysis:\n"); + H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); + H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); + H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); + H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); + H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); + H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); + H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); + H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); + H0(" --rskip <mode> Set mode for early exit from recursion. Mode 1: exit using rdcost & CU homogenity. Mode 2: exit using CU edge density.\n" + " Mode 0: disabled. Default %d\n", param->recursionSkipMode); + H1(" --rskip-edge-threshold Threshold in terms of percentage (integer of range [0,100]) for minimum edge density in CUs used to prun the recursion depth. Applicable only for rskip mode 2. Value is preset dependent. Default: %.f\n", param->edgeVarThreshold*100.0f); + H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); + H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); + H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); + H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); + H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" + " - 1: force the partitions if CTU information is present\n" + " - 2: functionality of (1) and reduce qp if CTU information has changed\n" + " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" + " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); + H0("\nCoding tools:\n"); + H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); + H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); + H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); + H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); + H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); + H0("\nTemporal / motion search options:\n"); + H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); + H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); + H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); + H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); + H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); + H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); + H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); + H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); + H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); + H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); + H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); + H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]); + H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange[0], param->hmeRange[1], param->hmeRange[2]); + H0("\nSpatial / intra options:\n"); + H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); + H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra)); + H0(" --[no-]b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames)); + H0(" --[no-]fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra)); + H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty); + H0("\nSlice decision options:\n"); + H0(" --[no-]open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP)); + H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax); + H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n"); + H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); + H0(" --no-scenecut Disable adaptive I-frame decision\n"); + H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n"); + H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n"); + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); + H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); + H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp)); + H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow); + H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta); + H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); + H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n"); + H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth); + H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices); + H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads); + H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes); + H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias); + H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive); + H0(" --[no-]b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid)); + H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n"); + H1(" Format of each line: framenumber frametype QP\n"); + H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n"); + H1(" QPs are restricted by qpmin/qpmax.\n"); + H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush); + H1(" 0 - flush the encoder only when all the input pictures are over.\n"); + H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n"); + H1(" 2 - flush the slicetype decided frames only.\n"); + H0(" --[no-]-hrd-concat Set HRD concatenation flag for the first keyframe in the buffering period SEI. Default %s\n", OPT(param->bEnableHRDConcatFlag)); + H0("\nRate control, Adaptive Quantization:\n"); + H0(" --bitrate <integer> Target bitrate (kbps) for ABR (implied). Default %d\n", param->rc.bitrate); + H1("-q/--qp <integer> QP for P slices in CQP mode (implied). --ipratio and --pbration determine other slice QPs\n"); + H0(" --crf <float> Quality-based VBR (0-51). Default %.1f\n", param->rc.rfConstant); + H1(" --[no-]lossless Enable lossless: bypass transform, quant and loop filters globally. Default %s\n", OPT(param->bLossless)); + H1(" --crf-max <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMax); + H1(" May cause VBV underflows!\n"); + H1(" --crf-min <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMin); + H1(" this specifies a minimum rate factor value for encode!\n"); + H0(" --vbv-maxrate <integer> Max local bitrate (kbit/s). Default %d\n", param->rc.vbvMaxBitrate); + H0(" --vbv-bufsize <integer> Set size of the VBV buffer (kbit). Default %d\n", param->rc.vbvBufferSize); + H0(" --vbv-init <float> Initial VBV buffer occupancy (fraction of bufsize or in kbits). Default %.2f\n", param->rc.vbvBufferInit); + H0(" --vbv-end <float> Final VBV buffer emptiness (fraction of bufsize or in kbits). Default 0 (disabled)\n"); + H0(" --vbv-end-fr-adj <float> Frame from which qp has to be adjusted to achieve final decode buffer emptiness. Default 0\n"); + H0(" --chunk-start <integer> First frame of the chunk. Default 0 (disabled)\n"); + H0(" --chunk-end <integer> Last frame of the chunk. Default 0 (disabled)\n"); + H0(" --pass Multi pass rate control.\n" + " - 1 : First pass, creates stats file\n" + " - 2 : Last pass, does not overwrite stats file\n" + " - 3 : Nth pass, overwrites stats file\n"); + H0(" --[no-]multi-pass-opt-analysis Refine analysis in 2 pass based on analysis information from pass 1\n"); + H0(" --[no-]multi-pass-opt-distortion Use distortion of CTU from pass 1 to refine qp in 2 pass\n"); + H0(" --stats Filename for stats file in multipass pass rate control. Default x265_2pass.log\n"); + H0(" --[no-]analyze-src-pics Motion estimation uses source frame planes. Default disable\n"); + H0(" --[no-]slow-firstpass Enable a slow first pass in a multipass rate control mode. Default %s\n", OPT(param->rc.bEnableSlowFirstPass)); + H0(" --[no-]strict-cbr Enable stricter conditions and tolerance for bitrate deviations in CBR mode. Default %s\n", OPT(param->rc.bStrictCbr)); + H0(" --analysis-save <filename> Dump analysis info into the specified file. Default Disabled\n"); + H0(" --analysis-load <filename> Load analysis buffers from the file specified. Default Disabled\n"); + H0(" --analysis-reuse-file <filename> Specify file name used for either dumping or reading analysis data. Deault x265_analysis.dat\n"); + H0(" --analysis-reuse-level <1..10> Level of analysis reuse indicates amount of info stored/reused in save/load mode, 1:least..10:most. Now deprecated. Default %d\n", param->analysisReuseLevel); + H0(" --analysis-save-reuse-level <1..10> Indicates the amount of analysis info stored in save mode, 1:least..10:most. Default %d\n", param->analysisSaveReuseLevel); + H0(" --analysis-load-reuse-level <1..10> Indicates the amount of analysis info reused in load mode, 1:least..10:most. Default %d\n", param->analysisLoadReuseLevel); + H0(" --refine-analysis-type <string> Reuse anlaysis information received through API call. Supported options are avc and hevc. Default disabled - %d\n", param->bAnalysisType); + H0(" --scale-factor <int> Specify factor by which input video is scaled down for analysis save mode. Default %d\n", param->scaleFactor); + H0(" --refine-intra <0..4> Enable intra refinement for encode that uses analysis-load.\n" + " - 0 : Forces both mode and depth from the save encode.\n" + " - 1 : Functionality of (0) + evaluate all intra modes at min-cu-size's depth when current depth is one smaller than min-cu-size's depth.\n" + " - 2 : Functionality of (1) + irrespective of size evaluate all angular modes when the save encode decides the best mode as angular.\n" + " - 3 : Functionality of (1) + irrespective of size evaluate all intra modes.\n" + " - 4 : Re-evaluate all intra blocks, does not reuse data from save encode.\n" + " Default:%d\n", param->intraRefine); + H0(" --refine-inter <0..3> Enable inter refinement for encode that uses analysis-load.\n" + " - 0 : Forces both mode and depth from the save encode.\n" + " - 1 : Functionality of (0) + evaluate all inter modes at min-cu-size's depth when current depth is one smaller than\n" + " min-cu-size's depth. When save encode decides the current block as skip(for all sizes) evaluate skip/merge.\n" + " - 2 : Functionality of (1) + irrespective of size restrict the modes evaluated when specific modes are decided as the best mode by the save encode.\n" + " - 3 : Functionality of (1) + irrespective of size evaluate all inter modes.\n" + " Default:%d\n", param->interRefine); + H0(" --[no-]dynamic-refine Dynamically changes refine-inter level for each CU. Default %s\n", OPT(param->bDynamicRefine)); + H0(" --refine-mv <1..3> Enable mv refinement for load mode. Default %d\n", param->mvRefine); + H0(" --refine-ctu-distortion Store/normalize ctu distortion in analysis-save/load.\n" + " - 0 : Disabled.\n" + " - 1 : Store/Load ctu distortion to/from the file specified in analysis-save/load.\n" + " Default 0 - Disabled\n"); + H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes 4:auto variance with edge information. Default %d\n", param->rc.aqMode); + H0(" --[no-]hevc-aq Mode for HEVC Adaptive Quantization. Default %s\n", OPT(param->rc.hevcAq)); + H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength); + H0(" --qp-adaptation-range <float> Delta QP range by QP adaptation based on a psycho-visual model (1.0 to 6.0). Default %.2f\n", param->rc.qpAdaptationRange); + H0(" --[no-]aq-motion Block level QP adaptation based on the relative motion between the block and the frame. Default %s\n", OPT(param->bAQMotion)); + H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize); + H0(" --[no-]cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree)); + H0(" --[no-]rc-grain Enable ratecontrol mode to handle grains specifically. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableGrain)); + H1(" --ipratio <float> QP factor between I and P. Default %.2f\n", param->rc.ipFactor); + H1(" --pbratio <float> QP factor between P and B. Default %.2f\n", param->rc.pbFactor); + H1(" --qcomp <float> Weight given to predicted complexity. Default %.2f\n", param->rc.qCompress); + H1(" --qpstep <integer> The maximum single adjustment in QP allowed to rate control. Default %d\n", param->rc.qpStep); + H1(" --qpmin <integer> sets a hard lower limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMin); + H1(" --qpmax <integer> sets a hard upper limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMax); + H0(" --[no-]const-vbv Enable consistent vbv. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableConstVbv)); + H1(" --cbqpoffs <integer> Chroma Cb QP Offset [-12..12]. Default %d\n", param->cbQpOffset); + H1(" --crqpoffs <integer> Chroma Cr QP Offset [-12..12]. Default %d\n", param->crQpOffset); + H1(" --scaling-list <string> Specify a file containing HM style quant scaling lists or 'default' or 'off'. Default: off\n"); + H1(" --zones <zone0>/<zone1>/... Tweak the bitrate of regions of the video\n"); + H1(" Each zone is of the form\n"); + H1(" <start frame>,<end frame>,<option>\n"); + H1(" where <option> is either\n"); + H1(" q=<integer> (force QP)\n"); + H1(" or b=<float> (bitrate multiplier)\n"); + H0(" --zonefile <filename> Zone file containing the zone boundaries and the parameters to be reconfigured.\n"); + H1(" --lambda-file <string> Specify a file containing replacement values for the lambda tables\n"); + H1(" MAX_MAX_QP+1 floats for lambda table, then again for lambda2 table\n"); + H1(" Blank lines and lines starting with hash(#) are ignored\n"); + H1(" Comma is considered to be white-space\n"); + H0(" --max-ausize-factor <float> This value controls the maximum AU size defined in specification.\n"); + H0(" It represents the percentage of maximum AU size used. Default %.1f\n", param->maxAUSizeFactor); + H0("\nLoop filters (deblock and SAO):\n"); + H0(" --[no-]deblock Enable Deblocking Loop Filter, optionally specify tC:Beta offsets Default %s\n", OPT(param->bEnableLoopFilter)); + H0(" --[no-]sao Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO)); + H1(" --[no-]sao-non-deblock Use non-deblocked pixels, else right/bottom boundary areas skipped. Default %s\n", OPT(param->bSaoNonDeblocked)); + H0(" --[no-]limit-sao Limit Sample Adaptive Offset types. Default %s\n", OPT(param->bLimitSAO)); + H0(" --selective-sao <int> Enable slice-level SAO filter. Default %d\n", param->selectiveSAO); + H0("\nVUI options:\n"); + H0(" --sar <width:height|int> Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n"); + H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n"); + H0(" 5=40:33, 6=24:11, 7=20:11, 8=32:11, 9=80:33, 10=18:11, 11=15:11,\n"); + H0(" 12=64:33, 13=160:99, 14=4:3, 15=3:2, 16=2:1 or custom ratio of <int:int>. Default %d\n", param->vui.aspectRatioIdc); + H1(" --display-window <string> Describe overscan cropping region as 'left,top,right,bottom' in pixels\n"); + H1(" --overscan <string> Specify whether it is appropriate for decoder to show cropped region: undef, show or crop. Default undef\n"); + H0(" --videoformat <string> Specify video format from undef, component, pal, ntsc, secam, mac. Default undef\n"); + H0(" --range <string> Specify black level and range of luma and chroma signals as full or limited Default limited\n"); + H0(" --colorprim <string> Specify color primaries from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); + H0(" smpte240m, film, bt2020, smpte428, smpte431, smpte432. Default undef\n"); + H0(" --transfer <string> Specify transfer characteristics from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); + H0(" smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1,\n"); + H0(" bt2020-10, bt2020-12, smpte2084, smpte428, arib-std-b67. Default undef\n"); + H1(" --colormatrix <string> Specify color matrix setting from undef, bt709, fcc, bt470bg, smpte170m,\n"); + H1(" smpte240m, GBR, YCgCo, bt2020nc, bt2020c, smpte2085, chroma-derived-nc, chroma-derived-c, ictcp. Default undef\n"); + H1(" --chromaloc <integer> Specify chroma sample location (0 to 5). Default of %d\n", param->vui.chromaSampleLocTypeTopField); + H0(" --master-display <string> SMPTE ST 2086 master display color volume info SEI (HDR)\n"); + H0(" format: G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)\n"); + H0(" --max-cll <string> Specify content light level info SEI as \"cll,fall\" (HDR).\n"); + H0(" --[no-]cll Emit content light level info SEI. Default %s\n", OPT(param->bEmitCLL)); + H0(" --[no-]hdr10 Control dumping of HDR10 SEI packet. If max-cll or master-display has non-zero values, this is enabled. Default %s\n", OPT(param->bEmitHDR10SEI)); + H0(" --[no-]hdr-opt Add luma and chroma offsets for HDR/WCG content. Default %s. Now deprecated.\n", OPT(param->bHDROpt)); + H0(" --[no-]hdr10-opt Block-level QP optimization for HDR10 content. Default %s.\n", OPT(param->bHDR10Opt)); + H0(" --min-luma <integer> Minimum luma plane value of input source picture\n"); + H0(" --max-luma <integer> Maximum luma plane value of input source picture\n"); + H0("\nBitstream options:\n"); + H0(" --[no-]repeat-headers Emit SPS and PPS headers at each keyframe. Default %s\n", OPT(param->bRepeatHeaders)); + H0(" --[no-]info Emit SEI identifying encoder and parameters. Default %s\n", OPT(param->bEmitInfoSEI)); + H0(" --[no-]hrd Enable HRD parameters signaling. Default %s\n", OPT(param->bEmitHRDSEI)); + H0(" --[no-]idr-recovery-sei Emit recovery point infor SEI at each IDR frame \n"); + H0(" --[no-]temporal-layers Enable a temporal sublayer for unreferenced B frames. Default %s\n", OPT(param->bEnableTemporalSubLayers)); + H0(" --[no-]aud Emit access unit delimiters at the start of each access unit. Default %s\n", OPT(param->bEnableAccessUnitDelimiters)); + H1(" --hash <integer> Decoded Picture Hash SEI 0: disabled, 1: MD5, 2: CRC, 3: Checksum. Default %d\n", param->decodedPictureHashSEI); + H0(" --atc-sei <integer> Emit the alternative transfer characteristics SEI message where the integer is the preferred transfer characteristics. Default disabled\n"); + H0(" --pic-struct <integer> Set the picture structure and emits it in the picture timing SEI message. Values in the range 0..12. See D.3.3 of the HEVC spec. for a detailed explanation.\n"); + H0(" --log2-max-poc-lsb <integer> Maximum of the picture order count\n"); + H0(" --[no-]vui-timing-info Emit VUI timing information in the bistream. Default %s\n", OPT(param->bEmitVUITimingInfo)); + H0(" --[no-]vui-hrd-info Emit VUI HRD information in the bistream. Default %s\n", OPT(param->bEmitVUIHRDInfo)); + H0(" --[no-]opt-qp-pps Dynamically optimize QP in PPS (instead of default 26) based on QPs in previous GOP. Default %s\n", OPT(param->bOptQpPPS)); + H0(" --[no-]opt-ref-list-length-pps Dynamically set L0 and L1 ref list length in PPS (instead of default 0) based on values in last GOP. Default %s\n", OPT(param->bOptRefListLengthPPS)); + H0(" --[no-]multi-pass-opt-rps Enable storing commonly used RPS in SPS in multi pass mode. Default %s\n", OPT(param->bMultiPassOptRPS)); + H0(" --[no-]opt-cu-delta-qp Optimize to signal consistent CU level delta QPs in frame. Default %s\n", OPT(param->bOptCUDeltaQP)); + H1("\nReconstructed video options (debugging):\n"); + H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n"); + H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n"); + H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n"); + H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct)); + H0(" --[no-]frame-dup Enable Frame duplication. Default %s\n", OPT(param->bEnableFrameDuplication)); + H0(" --dup-threshold <integer> PSNR threshold for Frame duplication. Default %d\n", param->dupThreshold); +#ifdef SVT_HEVC + H0(" --[no]svt Enable SVT HEVC encoder %s\n", OPT(param->bEnableSvtHevc)); + H0(" --[no-]svt-hme Enable Hierarchial motion estimation(HME) in SVT HEVC encoder \n"); + H0(" --svt-search-width Motion estimation search area width for SVT HEVC encoder \n"); + H0(" --svt-search-height Motion estimation search area height for SVT HEVC encoder \n"); + H0(" --[no-]svt-compressed-ten-bit-format Enable 8+2 encoding mode for 10bit input in SVT HEVC encoder \n"); + H0(" --[no-]svt-speed-control Enable speed control functionality to achieve real time encoding speed for SVT HEVC encoder \n"); + H0(" --svt-preset-tuner Enable additional faster presets of SVT; This only has to be used on top of x265's ultrafast preset. Accepts values in the range of 0-2 \n"); + H0(" --svt-hierarchical-level Hierarchical layer for SVT-HEVC encoder; Accepts inputs in the range 0-3 \n"); + H0(" --svt-base-layer-switch-mode Select whether B/P slice should be used in base layer for SVT-HEVC encoder. 0-Use B-frames; 1-Use P frames in the base layer \n"); + H0(" --svt-pred-struct Select pred structure for SVT HEVC encoder; Accepts inputs in the range 0-2 \n"); + H0(" --[no-]svt-fps-in-vps Enable VPS timing info for SVT HEVC encoder \n"); +#endif + H0(" ABR-ladder settings\n"); + H0(" --abr-ladder <file> File containing config settings required for the generation of ABR-ladder\n"); + H1("\nExecutable return codes:\n"); + H1(" 0 - encode successful\n"); + H1(" 1 - unable to parse command line\n"); + H1(" 2 - unable to open encoder\n"); + H1(" 3 - unable to generate stream headers\n"); + H1(" 4 - encoder abort\n"); +#undef OPT +#undef H0 +#undef H1 + if (level < X265_LOG_DEBUG) + printf("\nUse --fullhelp for a full listing (or --log-level full --help)\n"); + printf("\n\nComplete documentation may be found at http://x265.readthedocs.org/en/default/cli.html\n"); + exit(1); + } + + void CLIOptions::destroy() + { + if (isAbrLadderConfig) + { + for (int idx = 1; idx < argCnt; idx++) + free(argString[idx]); + free(argString); + } + + if (input) + input->release(); + input = NULL; + if (recon) + recon->release(); + recon = NULL; + if (qpfile) + fclose(qpfile); + qpfile = NULL; + if (zoneFile) + fclose(zoneFile); + zoneFile = NULL; + if (dolbyVisionRpu) + fclose(dolbyVisionRpu); + dolbyVisionRpu = NULL; + if (output) + output->release(); + output = NULL; + } + + void CLIOptions::printStatus(uint32_t frameNum) + { + char buf[200]; + int64_t time = x265_mdate(); + + if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL)) + return; + + int64_t elapsed = time - startTime; + double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0; + float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum); + if (framesToBeEncoded) + { + int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000)); + sprintf(buf, "x265 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", + 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate, + eta / 3600, (eta / 60) % 60, eta % 60); + } + else + sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate); + + fprintf(stderr, "%s \r", buf + 5); + SetConsoleTitle(buf); + fflush(stderr); // needed in windows + prevUpdateTime = time; + } + + bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount) + { + bool bError = false; + int bShowHelp = false; + int outputBitDepth = 0; + const char *profile = NULL; + + /* Presets are applied before all other options. */ + for (optind = 0;;) + { + int c = getopt_long(argc, argv, short_options, long_options, NULL); + if (c == -1) + break; + else if (c == 'D') + outputBitDepth = atoi(optarg); + else if (c == 'P') + profile = optarg; + else if (c == '?') + bShowHelp = true; + } + + if (!outputBitDepth && profile) + { + /* try to derive the output bit depth from the requested profile */ + if (strstr(profile, "10")) + outputBitDepth = 10; + else if (strstr(profile, "12")) + outputBitDepth = 12; + else + outputBitDepth = 8; + } + + api = x265_api_get(outputBitDepth); + if (!api) + { + x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); + api = x265_api_get(0); + } + + if (bShowHelp) + { + printVersion(globalParam, api); + showHelp(globalParam); + } + + globalParam->rc.zones[zonefileCount].zoneParam = api->param_alloc(); + if (!globalParam->rc.zones[zonefileCount].zoneParam) + { + x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); + return true; + } + + memcpy(globalParam->rc.zones[zonefileCount].zoneParam, globalParam, sizeof(x265_param)); + + for (optind = 0;;) + { + int long_options_index = -1; + int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); + if (c == -1) + break; + + if (long_options_index < 0 && c > 0) + { + for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options[0]); i++) + { + if (long_options[i].val == c) + { + long_options_index = (int)i; + break; + } + } + + if (long_options_index < 0) + { + /* getopt_long might have already printed an error message */ + if (c != 63) + x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); + return true; + } + } + if (long_options_index < 0) + { + x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); + return true; + } + + bError |= !!api->zone_param_parse(globalParam->rc.zones[zonefileCount].zoneParam, long_options[long_options_index].name, optarg); + + if (bError) + { + const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind - 2]; + x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); + return true; + } + } + + if (optind < argc) + { + x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argv[optind]); + return true; + } + return false; + } + + bool CLIOptions::parse(int argc, char **argv) + { + bool bError = false; + int bShowHelp = false; + int inputBitDepth = 8; + int outputBitDepth = 0; + int reconFileBitDepth = 0; + const char *inputfn = NULL; + const char *reconfn = NULL; + const char *outputfn = NULL; + const char *preset = NULL; + const char *tune = NULL; + const char *profile = NULL; + int svtEnabled = 0; + argCnt = argc; + argString = argv; + + if (argc <= 1) + { + x265_log(NULL, X265_LOG_ERROR, "No input file. Run x265 --help for a list of options.\n"); + return true; + } + + /* Presets are applied before all other options. */ + for (optind = 0;;) + { + int optionsIndex = -1; + int c = getopt_long(argc, argv, short_options, long_options, &optionsIndex); + if (c == -1) + break; + else if (c == 'p') + preset = optarg; + else if (c == 't') + tune = optarg; + else if (c == 'D') + outputBitDepth = atoi(optarg); + else if (c == 'P') + profile = optarg; + else if (c == '?') + bShowHelp = true; + else if (!c && !strcmp(long_options[optionsIndex].name, "svt")) + svtEnabled = 1; + } + + if (!outputBitDepth && profile) + { + /* try to derive the output bit depth from the requested profile */ + if (strstr(profile, "10")) + outputBitDepth = 10; + else if (strstr(profile, "12")) + outputBitDepth = 12; + else + outputBitDepth = 8; + } + + api = x265_api_get(outputBitDepth); + if (!api) + { + x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); + api = x265_api_get(0); + } + + param = api->param_alloc(); + if (!param) + { + x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n"); + return true; + } +#if ENABLE_LIBVMAF + vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data)); + if (!vmafData) + { + x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n"); + return true; + } +#endif + + if (api->param_default_preset(param, preset, tune) < 0) + { + x265_log(NULL, X265_LOG_ERROR, "preset or tune unrecognized\n"); + return true; + } + + if (bShowHelp) + { + printVersion(param, api); + showHelp(param); + } + + //Set enable SVT-HEVC encoder first if found in the command line + if (svtEnabled) api->param_parse(param, "svt", NULL); + + for (optind = 0;;) + { + int long_options_index = -1; + int c = getopt_long(argc, argv, short_options, long_options, &long_options_index); + if (c == -1) + break; + + switch (c) + { + case 'h': + printVersion(param, api); + showHelp(param); + break; + + case 'V': + printVersion(param, api); + x265_report_simd(param); + exit(0); + + default: + if (long_options_index < 0 && c > 0) + { + for (size_t i = 0; i < sizeof(long_options) / sizeof(long_options[0]); i++) + { + if (long_options[i].val == c) + { + long_options_index = (int)i; + break; + } + } + + if (long_options_index < 0) + { + /* getopt_long might have already printed an error message */ + if (c != 63) + x265_log(NULL, X265_LOG_WARNING, "internal error: short option '%c' has no long option\n", c); + return true; + } + } + if (long_options_index < 0) + { + x265_log(NULL, X265_LOG_WARNING, "short option '%c' unrecognized\n", c); + return true; + } +#define OPT(longname) \ + else if (!strcmp(long_options[long_options_index].name, longname)) +#define OPT2(name1, name2) \ + else if (!strcmp(long_options[long_options_index].name, name1) || \ + !strcmp(long_options[long_options_index].name, name2)) + + if (0); + OPT2("frame-skip", "seek") this->seek = (uint32_t)x265_atoi(optarg, bError); + OPT("frames") this->framesToBeEncoded = (uint32_t)x265_atoi(optarg, bError); + OPT("no-progress") this->bProgress = false; + OPT("output") outputfn = optarg; + OPT("input") inputfn = optarg; + OPT("recon") reconfn = optarg; + OPT("input-depth") inputBitDepth = (uint32_t)x265_atoi(optarg, bError); + OPT("dither") this->bDither = true; + OPT("recon-depth") reconFileBitDepth = (uint32_t)x265_atoi(optarg, bError); + OPT("y4m") this->bForceY4m = true; + OPT("profile") /* handled above */; + OPT("preset") /* handled above */; + OPT("tune") /* handled above */; + OPT("output-depth") /* handled above */; + OPT("recon-y4m-exec") reconPlayCmd = optarg; + OPT("svt") /* handled above */; + OPT("qpfile") + { + this->qpfile = x265_fopen(optarg, "rb"); + if (!this->qpfile) + x265_log_file(param, X265_LOG_ERROR, "%s qpfile not found or error in opening qp file\n", optarg); + } + OPT("dolby-vision-rpu") + { + this->dolbyVisionRpu = x265_fopen(optarg, "rb"); + if (!this->dolbyVisionRpu) + { + x265_log_file(param, X265_LOG_ERROR, "Dolby Vision RPU metadata file %s not found or error in opening file\n", optarg); + return true; + } + } + OPT("zonefile") + { + this->zoneFile = x265_fopen(optarg, "rb"); + if (!this->zoneFile) + x265_log_file(param, X265_LOG_ERROR, "%s zone file not found or error in opening zone file\n", optarg); + } + OPT("fullhelp") + { + param->logLevel = X265_LOG_FULL; + printVersion(param, api); + showHelp(param); + break; + } + else + bError |= !!api->param_parse(param, long_options[long_options_index].name, optarg); + if (bError) + { + const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind - 2]; + x265_log(NULL, X265_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg); + return true; + } +#undef OPT + } + } + + if (optind < argc && !inputfn) + inputfn = argv[optind++]; + if (optind < argc && !outputfn) + outputfn = argv[optind++]; + if (optind < argc) + { + x265_log(param, X265_LOG_WARNING, "extra unused command arguments given <%s>\n", argv[optind]); + return true; + } + + if (argc <= 1) + { + api->param_default(param); + printVersion(param, api); + showHelp(param); + } + + if (!inputfn || !outputfn) + { + x265_log(param, X265_LOG_ERROR, "input or output file not specified, try --help for help\n"); + return true; + } + + if (param->internalBitDepth != api->bit_depth) + { + x265_log(param, X265_LOG_ERROR, "Only bit depths of %d are supported in this build\n", api->bit_depth); + return true; + } + +#ifdef SVT_HEVC + if (svtEnabled) + { + EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; + param->sourceWidth = svtParam->sourceWidth; + param->sourceHeight = svtParam->sourceHeight; + param->fpsNum = svtParam->frameRateNumerator; + param->fpsDenom = svtParam->frameRateDenominator; + svtParam->encoderBitDepth = inputBitDepth; + } +#endif + + InputFileInfo info; + info.filename = inputfn; + info.depth = inputBitDepth; + info.csp = param->internalCsp; + info.width = param->sourceWidth; + info.height = param->sourceHeight; + info.fpsNum = param->fpsNum; + info.fpsDenom = param->fpsDenom; + info.sarWidth = param->vui.sarWidth; + info.sarHeight = param->vui.sarHeight; + info.skipFrames = seek; + info.frameCount = 0; + getParamAspectRatio(param, info.sarWidth, info.sarHeight); + + + this->input = InputFile::open(info, this->bForceY4m); + if (!this->input || this->input->isFail()) + { + x265_log_file(param, X265_LOG_ERROR, "unable to open input file <%s>\n", inputfn); + return true; + } + + if (info.depth < 8 || info.depth > 16) + { + x265_log(param, X265_LOG_ERROR, "Input bit depth (%d) must be between 8 and 16\n", inputBitDepth); + return true; + } + + /* Unconditionally accept height/width/csp/bitDepth from file info */ + param->sourceWidth = info.width; + param->sourceHeight = info.height; + param->internalCsp = info.csp; + param->sourceBitDepth = info.depth; + + /* Accept fps and sar from file info if not specified by user */ + if (param->fpsDenom == 0 || param->fpsNum == 0) + { + param->fpsDenom = info.fpsDenom; + param->fpsNum = info.fpsNum; + } + if (!param->vui.aspectRatioIdc && info.sarWidth && info.sarHeight) + setParamAspectRatio(param, info.sarWidth, info.sarHeight); + if (this->framesToBeEncoded == 0 && info.frameCount > (int)seek) + this->framesToBeEncoded = info.frameCount - seek; + param->totalFrames = this->framesToBeEncoded; + +#ifdef SVT_HEVC + if (svtEnabled) + { + EB_H265_ENC_CONFIGURATION* svtParam = (EB_H265_ENC_CONFIGURATION*)param->svtHevcParam; + svtParam->sourceWidth = param->sourceWidth; + svtParam->sourceHeight = param->sourceHeight; + svtParam->frameRateNumerator = param->fpsNum; + svtParam->frameRateDenominator = param->fpsDenom; + svtParam->framesToBeEncoded = param->totalFrames; + svtParam->encoderColorFormat = (EB_COLOR_FORMAT)param->internalCsp; + } +#endif + + /* Force CFR until we have support for VFR */ + info.timebaseNum = param->fpsDenom; + info.timebaseDenom = param->fpsNum; + + if (param->bField && param->interlaceMode) + { // Field FPS + param->fpsNum *= 2; + // Field height + param->sourceHeight = param->sourceHeight >> 1; + // Number of fields to encode + param->totalFrames *= 2; + } + + if (api->param_apply_profile(param, profile)) + return true; + + if (param->logLevel >= X265_LOG_INFO) + { + char buf[128]; + int p = sprintf(buf, "%dx%d fps %d/%d %sp%d", param->sourceWidth, param->sourceHeight, + param->fpsNum, param->fpsDenom, x265_source_csp_names[param->internalCsp], info.depth); + + int width, height; + getParamAspectRatio(param, width, height); + if (width && height) + p += sprintf(buf + p, " sar %d:%d", width, height); + + if (framesToBeEncoded <= 0 || info.frameCount <= 0) + strcpy(buf + p, " unknown frame count"); + else + sprintf(buf + p, " frames %u - %d of %d", this->seek, this->seek + this->framesToBeEncoded - 1, info.frameCount); + + general_log(param, input->getName(), X265_LOG_INFO, "%s\n", buf); + } + + this->input->startReader(); + + if (reconfn) + { + if (reconFileBitDepth == 0) + reconFileBitDepth = param->internalBitDepth; + this->recon = ReconFile::open(reconfn, param->sourceWidth, param->sourceHeight, reconFileBitDepth, + param->fpsNum, param->fpsDenom, param->internalCsp); + if (this->recon->isFail()) + { + x265_log(param, X265_LOG_WARNING, "unable to write reconstructed outputs file\n"); + this->recon->release(); + this->recon = 0; + } + else + general_log(param, this->recon->getName(), X265_LOG_INFO, + "reconstructed images %dx%d fps %d/%d %s\n", + param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom, + x265_source_csp_names[param->internalCsp]); + } +#if ENABLE_LIBVMAF + if (!reconfn) + { + x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n"); + return true; + } + const char *str = strrchr(info.filename, '.'); + + if (!strcmp(str, ".y4m")) + { + x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n"); + return true; + } + if (param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444) + { + vmafData->reference_file = x265_fopen(inputfn, "rb"); + vmafData->distorted_file = x265_fopen(reconfn, "rb"); + } + else + { + x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n"); + return true; + } +#endif + this->output = OutputFile::open(outputfn, info); + if (this->output->isFail()) + { + x265_log_file(param, X265_LOG_ERROR, "failed to open output file <%s> for writing\n", outputfn); + return true; + } + general_log_file(param, this->output->getName(), X265_LOG_INFO, "output file: %s\n", outputfn); + return false; + } + + bool CLIOptions::parseQPFile(x265_picture &pic_org) + { + int32_t num = -1, qp, ret; + char type; + uint32_t filePos; + pic_org.forceqp = 0; + pic_org.sliceType = X265_TYPE_AUTO; + while (num < pic_org.poc) + { + filePos = ftell(qpfile); + qp = -1; + ret = fscanf(qpfile, "%d %c%*[ \t]%d\n", &num, &type, &qp); + + if (num > pic_org.poc || ret == EOF) + { + fseek(qpfile, filePos, SEEK_SET); + break; + } + if (num < pic_org.poc && ret >= 2) + continue; + if (ret == 3 && qp >= 0) + pic_org.forceqp = qp + 1; + if (type == 'I') pic_org.sliceType = X265_TYPE_IDR; + else if (type == 'i') pic_org.sliceType = X265_TYPE_I; + else if (type == 'K') pic_org.sliceType = param->bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR; + else if (type == 'P') pic_org.sliceType = X265_TYPE_P; + else if (type == 'B') pic_org.sliceType = X265_TYPE_BREF; + else if (type == 'b') pic_org.sliceType = X265_TYPE_B; + else ret = 0; + if (ret < 2 || qp < -1 || qp > 51) + return 0; + } + return 1; + } + + bool CLIOptions::parseZoneFile() + { + char line[256]; + char* argLine; + param->rc.zonefileCount = 0; + + while (fgets(line, sizeof(line), zoneFile)) + { + if (!((*line == '#') || (strcmp(line, "\r\n") == 0))) + param->rc.zonefileCount++; + } + + rewind(zoneFile); + param->rc.zones = X265_MALLOC(x265_zone, param->rc.zonefileCount); + for (int i = 0; i < param->rc.zonefileCount; i++) + { + while (fgets(line, sizeof(line), zoneFile)) + { + if (*line == '#' || (strcmp(line, "\r\n") == 0)) + continue; + param->rc.zones[i].zoneParam = X265_MALLOC(x265_param, 1); + int index = (int)strcspn(line, "\r\n"); + line[index] = '\0'; + argLine = line; + while (isspace((unsigned char)*argLine)) argLine++; + char* start = strchr(argLine, ' '); + start++; + param->rc.zones[i].startFrame = atoi(argLine); + int argCount = 0; + char **args = (char**)malloc(256 * sizeof(char *)); + // Adding a dummy string to avoid file parsing error + args[argCount++] = (char *)"x265"; + char* token = strtok(start, " "); + while (token) + { + args[argCount++] = token; + token = strtok(NULL, " "); + } + args[argCount] = NULL; + CLIOptions cliopt; + if (cliopt.parseZoneParam(argCount, args, param, i)) + { + cliopt.destroy(); + if (cliopt.api) + cliopt.api->param_free(cliopt.param); + exit(1); + } + break; + } + } + return 1; + } + + /* Parse the RPU file and extract the RPU corresponding to the current picture + * and fill the rpu field of the input picture */ + int CLIOptions::rpuParser(x265_picture * pic) + { + uint8_t byteVal; + uint32_t code = 0; + int bytesRead = 0; + pic->rpu.payloadSize = 0; + + if (!pic->pts) + { + while (bytesRead++ < 4 && fread(&byteVal, sizeof(uint8_t), 1, dolbyVisionRpu)) + code = (code << 8) | byteVal; + + if (code != START_CODE) + { + x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU startcode in POC %d\n", pic->pts); + return 1; + } + } + + bytesRead = 0; + while (fread(&byteVal, sizeof(uint8_t), 1, dolbyVisionRpu)) + { + code = (code << 8) | byteVal; + if (bytesRead++ < 3) + continue; + if (bytesRead >= 1024) + { + x265_log(NULL, X265_LOG_ERROR, "Invalid Dolby Vision RPU size in POC %d\n", pic->pts); + return 1; + } + + if (code != START_CODE) + pic->rpu.payload[pic->rpu.payloadSize++] = (code >> (3 * 8)) & 0xFF; + else + return 0; + } + + int ShiftBytes = START_CODE_BYTES - (bytesRead - pic->rpu.payloadSize); + int bytesLeft = bytesRead - pic->rpu.payloadSize; + code = (code << ShiftBytes * 8); + for (int i = 0; i < bytesLeft; i++) + { + pic->rpu.payload[pic->rpu.payloadSize++] = (code >> (3 * 8)) & 0xFF; + code = (code << 8); + } + if (!pic->rpu.payloadSize) + x265_log(NULL, X265_LOG_WARNING, "Dolby Vision RPU not found for POC %d\n", pic->pts); + return 0; + } + +#ifdef __cplusplus +} +#endif \ No newline at end of file
View file
x265_3.3.tar.gz/source/x265cli.h -> x265_3.4.tar.gz/source/x265cli.h
Changed
@@ -27,9 +27,23 @@ #include "common.h" #include "param.h" +#include "input/input.h" +#include "output/output.h" +#include "output/reconplay.h" #include <getopt.h> +#define CONSOLE_TITLE_SIZE 200 +#ifdef _WIN32 +#include <windows.h> +#define SetThreadExecutionState(es) +static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = ""; +#else +#define GetConsoleTitle(t, n) +#define SetConsoleTitle(t) +#define SetThreadExecutionState(es) +#endif + #ifdef __cplusplus namespace X265_NS { #endif @@ -105,8 +119,8 @@ { "amp", no_argument, NULL, 0 }, { "no-early-skip", no_argument, NULL, 0 }, { "early-skip", no_argument, NULL, 0 }, - { "no-rskip", no_argument, NULL, 0 }, - { "rskip", no_argument, NULL, 0 }, + { "rskip", required_argument, NULL, 0 }, + { "rskip-edge-threshold", required_argument, NULL, 0 }, { "no-fast-cbf", no_argument, NULL, 0 }, { "fast-cbf", no_argument, NULL, 0 }, { "no-tskip", no_argument, NULL, 0 }, @@ -358,6 +372,7 @@ { "cll", no_argument, NULL, 0 }, { "no-cll", no_argument, NULL, 0 }, { "hme-range", required_argument, NULL, 0 }, + { "abr-ladder", required_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -365,336 +380,82 @@ { 0, 0, 0, 0 } }; -static void printVersion(x265_param *param, const x265_api* api) -{ - x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); - x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); -} + struct CLIOptions + { + InputFile* input; + ReconFile* recon; + OutputFile* output; + FILE* qpfile; + FILE* zoneFile; + FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ + const char* reconPlayCmd; + const x265_api* api; + x265_param* param; + x265_vmaf_data* vmafData; + bool bProgress; + bool bForceY4m; + bool bDither; + uint32_t seek; // number of frames to skip from the beginning + uint32_t framesToBeEncoded; // number of frames to encode + uint64_t totalbytes; + int64_t startTime; + int64_t prevUpdateTime; -static void showHelp(x265_param *param) -{ - int level = param->logLevel; + int argCnt; + char** argString; -#define OPT(value) (value ? "enabled" : "disabled") -#define H0 printf -#define H1 if (level >= X265_LOG_DEBUG) printf + /* ABR ladder settings */ + bool isAbrLadderConfig; + bool enableScaler; + char* encName; + char* reuseName; + uint32_t encId; + int refId; + uint32_t loadLevel; + uint32_t saveLevel; + uint32_t numRefs; - H0("\nSyntax: x265 [options] infile [-o] outfile\n"); - H0(" infile can be YUV or Y4M\n"); - H0(" outfile is raw HEVC bitstream\n"); - H0("\nExecutable Options:\n"); - H0("-h/--help Show this help text and exit\n"); - H0(" --fullhelp Show all options and exit\n"); - H0("-V/--version Show version info and exit\n"); - H0("\nOutput Options:\n"); - H0("-o/--output <filename> Bitstream output file name\n"); - H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); - H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]); - H0(" --no-progress Disable CLI progress reports\n"); - H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); - H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); - H0("\nInput Options:\n"); - H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); - H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); - H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); - H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n"); - H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); - H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); - H1(" 0 - i400 (4:0:0 monochrome)\n"); - H1(" 1 - i420 (4:2:0 default)\n"); - H1(" 2 - i422 (4:2:2)\n"); - H1(" 3 - i444 (4:4:4)\n"); -#if ENABLE_HDR10_PLUS - H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); - H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); -#endif - H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); - H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" - " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); - H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); - H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); - H0(" --seek <integer> First frame to encode\n"); - H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); - H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT( param->bField)); - H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); - H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); - H0("\nQuality reporting metrics:\n"); - H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); - H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); - H0("\nProfile, Level, Tier:\n"); - H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); - H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); - H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); - H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); - H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); - H0("\nThreading, performance:\n"); - H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); - H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); - H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); - H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); - H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); - H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); - H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); - H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n"); - H0("\nPresets:\n"); - H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); - H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); - H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); - H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); - H0("\nQuad-Tree size and depth:\n"); - H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); - H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); - H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); - H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); - H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); - H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); - H0("\nAnalysis:\n"); - H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); - H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); - H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); - H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); - H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); - H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); - H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); - H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); - H0(" --[no-]rskip Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip)); - H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); - H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); - H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); - H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); - H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" - " - 1: force the partitions if CTU information is present\n" - " - 2: functionality of (1) and reduce qp if CTU information has changed\n" - " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" - " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); - H0("\nCoding tools:\n"); - H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); - H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); - H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); - H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); - H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); - H0("\nTemporal / motion search options:\n"); - H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); - H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); - H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); - H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); - H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); - H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); - H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); - H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); - H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); - H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); - H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); - H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]); - H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange[0], param->hmeRange[1], param->hmeRange[2]); - H0("\nSpatial / intra options:\n"); - H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); - H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra)); - H0(" --[no-]b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames)); - H0(" --[no-]fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra)); - H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty); - H0("\nSlice decision options:\n"); - H0(" --[no-]open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP)); - H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax); - H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n"); - H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); - H0(" --no-scenecut Disable adaptive I-frame decision\n"); - H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); - H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n"); - H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n"); - H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); - H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); - H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp)); - H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow); - H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta); - H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); - H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n"); - H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth); - H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices); - H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads); - H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes); - H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias); - H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive); - H0(" --[no-]b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid)); - H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n"); - H1(" Format of each line: framenumber frametype QP\n"); - H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n"); - H1(" QPs are restricted by qpmin/qpmax.\n"); - H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush); - H1(" 0 - flush the encoder only when all the input pictures are over.\n"); - H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n"); - H1(" 2 - flush the slicetype decided frames only.\n"); - H0(" --[no-]-hrd-concat Set HRD concatenation flag for the first keyframe in the buffering period SEI. Default %s\n", OPT(param->bEnableHRDConcatFlag)); - H0("\nRate control, Adaptive Quantization:\n"); - H0(" --bitrate <integer> Target bitrate (kbps) for ABR (implied). Default %d\n", param->rc.bitrate); - H1("-q/--qp <integer> QP for P slices in CQP mode (implied). --ipratio and --pbration determine other slice QPs\n"); - H0(" --crf <float> Quality-based VBR (0-51). Default %.1f\n", param->rc.rfConstant); - H1(" --[no-]lossless Enable lossless: bypass transform, quant and loop filters globally. Default %s\n", OPT(param->bLossless)); - H1(" --crf-max <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMax); - H1(" May cause VBV underflows!\n"); - H1(" --crf-min <float> With CRF+VBV, limit RF to this value. Default %f\n", param->rc.rfConstantMin); - H1(" this specifies a minimum rate factor value for encode!\n"); - H0(" --vbv-maxrate <integer> Max local bitrate (kbit/s). Default %d\n", param->rc.vbvMaxBitrate); - H0(" --vbv-bufsize <integer> Set size of the VBV buffer (kbit). Default %d\n", param->rc.vbvBufferSize); - H0(" --vbv-init <float> Initial VBV buffer occupancy (fraction of bufsize or in kbits). Default %.2f\n", param->rc.vbvBufferInit); - H0(" --vbv-end <float> Final VBV buffer emptiness (fraction of bufsize or in kbits). Default 0 (disabled)\n"); - H0(" --vbv-end-fr-adj <float> Frame from which qp has to be adjusted to achieve final decode buffer emptiness. Default 0\n"); - H0(" --chunk-start <integer> First frame of the chunk. Default 0 (disabled)\n"); - H0(" --chunk-end <integer> Last frame of the chunk. Default 0 (disabled)\n"); - H0(" --pass Multi pass rate control.\n" - " - 1 : First pass, creates stats file\n" - " - 2 : Last pass, does not overwrite stats file\n" - " - 3 : Nth pass, overwrites stats file\n"); - H0(" --[no-]multi-pass-opt-analysis Refine analysis in 2 pass based on analysis information from pass 1\n"); - H0(" --[no-]multi-pass-opt-distortion Use distortion of CTU from pass 1 to refine qp in 2 pass\n"); - H0(" --stats Filename for stats file in multipass pass rate control. Default x265_2pass.log\n"); - H0(" --[no-]analyze-src-pics Motion estimation uses source frame planes. Default disable\n"); - H0(" --[no-]slow-firstpass Enable a slow first pass in a multipass rate control mode. Default %s\n", OPT(param->rc.bEnableSlowFirstPass)); - H0(" --[no-]strict-cbr Enable stricter conditions and tolerance for bitrate deviations in CBR mode. Default %s\n", OPT(param->rc.bStrictCbr)); - H0(" --analysis-save <filename> Dump analysis info into the specified file. Default Disabled\n"); - H0(" --analysis-load <filename> Load analysis buffers from the file specified. Default Disabled\n"); - H0(" --analysis-reuse-file <filename> Specify file name used for either dumping or reading analysis data. Deault x265_analysis.dat\n"); - H0(" --analysis-reuse-level <1..10> Level of analysis reuse indicates amount of info stored/reused in save/load mode, 1:least..10:most. Now deprecated. Default %d\n", param->analysisReuseLevel); - H0(" --analysis-save-reuse-level <1..10> Indicates the amount of analysis info stored in save mode, 1:least..10:most. Default %d\n", param->analysisSaveReuseLevel); - H0(" --analysis-load-reuse-level <1..10> Indicates the amount of analysis info reused in load mode, 1:least..10:most. Default %d\n", param->analysisLoadReuseLevel); - H0(" --refine-analysis-type <string> Reuse anlaysis information received through API call. Supported options are avc and hevc. Default disabled - %d\n", param->bAnalysisType); - H0(" --scale-factor <int> Specify factor by which input video is scaled down for analysis save mode. Default %d\n", param->scaleFactor); - H0(" --refine-intra <0..4> Enable intra refinement for encode that uses analysis-load.\n" - " - 0 : Forces both mode and depth from the save encode.\n" - " - 1 : Functionality of (0) + evaluate all intra modes at min-cu-size's depth when current depth is one smaller than min-cu-size's depth.\n" - " - 2 : Functionality of (1) + irrespective of size evaluate all angular modes when the save encode decides the best mode as angular.\n" - " - 3 : Functionality of (1) + irrespective of size evaluate all intra modes.\n" - " - 4 : Re-evaluate all intra blocks, does not reuse data from save encode.\n" - " Default:%d\n", param->intraRefine); - H0(" --refine-inter <0..3> Enable inter refinement for encode that uses analysis-load.\n" - " - 0 : Forces both mode and depth from the save encode.\n" - " - 1 : Functionality of (0) + evaluate all inter modes at min-cu-size's depth when current depth is one smaller than\n" - " min-cu-size's depth. When save encode decides the current block as skip(for all sizes) evaluate skip/merge.\n" - " - 2 : Functionality of (1) + irrespective of size restrict the modes evaluated when specific modes are decided as the best mode by the save encode.\n" - " - 3 : Functionality of (1) + irrespective of size evaluate all inter modes.\n" - " Default:%d\n", param->interRefine); - H0(" --[no-]dynamic-refine Dynamically changes refine-inter level for each CU. Default %s\n", OPT(param->bDynamicRefine)); - H0(" --refine-mv <1..3> Enable mv refinement for load mode. Default %d\n", param->mvRefine); - H0(" --refine-ctu-distortion Store/normalize ctu distortion in analysis-save/load.\n" - " - 0 : Disabled.\n" - " - 1 : Store/Load ctu distortion to/from the file specified in analysis-save/load.\n" - " Default 0 - Disabled\n"); - H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes 4:auto variance with edge information. Default %d\n", param->rc.aqMode); - H0(" --[no-]hevc-aq Mode for HEVC Adaptive Quantization. Default %s\n", OPT(param->rc.hevcAq)); - H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength); - H0(" --qp-adaptation-range <float> Delta QP range by QP adaptation based on a psycho-visual model (1.0 to 6.0). Default %.2f\n", param->rc.qpAdaptationRange); - H0(" --[no-]aq-motion Block level QP adaptation based on the relative motion between the block and the frame. Default %s\n", OPT(param->bAQMotion)); - H0(" --qg-size <int> Specifies the size of the quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize); - H0(" --[no-]cutree Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree)); - H0(" --[no-]rc-grain Enable ratecontrol mode to handle grains specifically. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableGrain)); - H1(" --ipratio <float> QP factor between I and P. Default %.2f\n", param->rc.ipFactor); - H1(" --pbratio <float> QP factor between P and B. Default %.2f\n", param->rc.pbFactor); - H1(" --qcomp <float> Weight given to predicted complexity. Default %.2f\n", param->rc.qCompress); - H1(" --qpstep <integer> The maximum single adjustment in QP allowed to rate control. Default %d\n", param->rc.qpStep); - H1(" --qpmin <integer> sets a hard lower limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMin); - H1(" --qpmax <integer> sets a hard upper limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMax); - H0(" --[no-]const-vbv Enable consistent vbv. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableConstVbv)); - H1(" --cbqpoffs <integer> Chroma Cb QP Offset [-12..12]. Default %d\n", param->cbQpOffset); - H1(" --crqpoffs <integer> Chroma Cr QP Offset [-12..12]. Default %d\n", param->crQpOffset); - H1(" --scaling-list <string> Specify a file containing HM style quant scaling lists or 'default' or 'off'. Default: off\n"); - H1(" --zones <zone0>/<zone1>/... Tweak the bitrate of regions of the video\n"); - H1(" Each zone is of the form\n"); - H1(" <start frame>,<end frame>,<option>\n"); - H1(" where <option> is either\n"); - H1(" q=<integer> (force QP)\n"); - H1(" or b=<float> (bitrate multiplier)\n"); - H0(" --zonefile <filename> Zone file containing the zone boundaries and the parameters to be reconfigured.\n"); - H1(" --lambda-file <string> Specify a file containing replacement values for the lambda tables\n"); - H1(" MAX_MAX_QP+1 floats for lambda table, then again for lambda2 table\n"); - H1(" Blank lines and lines starting with hash(#) are ignored\n"); - H1(" Comma is considered to be white-space\n"); - H0(" --max-ausize-factor <float> This value controls the maximum AU size defined in specification.\n"); - H0(" It represents the percentage of maximum AU size used. Default %.1f\n", param->maxAUSizeFactor); - H0("\nLoop filters (deblock and SAO):\n"); - H0(" --[no-]deblock Enable Deblocking Loop Filter, optionally specify tC:Beta offsets Default %s\n", OPT(param->bEnableLoopFilter)); - H0(" --[no-]sao Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO)); - H1(" --[no-]sao-non-deblock Use non-deblocked pixels, else right/bottom boundary areas skipped. Default %s\n", OPT(param->bSaoNonDeblocked)); - H0(" --[no-]limit-sao Limit Sample Adaptive Offset types. Default %s\n", OPT(param->bLimitSAO)); - H0(" --selective-sao <int> Enable slice-level SAO filter. Default %d\n", param->selectiveSAO); - H0("\nVUI options:\n"); - H0(" --sar <width:height|int> Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n"); - H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n"); - H0(" 5=40:33, 6=24:11, 7=20:11, 8=32:11, 9=80:33, 10=18:11, 11=15:11,\n"); - H0(" 12=64:33, 13=160:99, 14=4:3, 15=3:2, 16=2:1 or custom ratio of <int:int>. Default %d\n", param->vui.aspectRatioIdc); - H1(" --display-window <string> Describe overscan cropping region as 'left,top,right,bottom' in pixels\n"); - H1(" --overscan <string> Specify whether it is appropriate for decoder to show cropped region: undef, show or crop. Default undef\n"); - H0(" --videoformat <string> Specify video format from undef, component, pal, ntsc, secam, mac. Default undef\n"); - H0(" --range <string> Specify black level and range of luma and chroma signals as full or limited Default limited\n"); - H0(" --colorprim <string> Specify color primaries from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); - H0(" smpte240m, film, bt2020, smpte428, smpte431, smpte432. Default undef\n"); - H0(" --transfer <string> Specify transfer characteristics from bt709, unknown, reserved, bt470m, bt470bg, smpte170m,\n"); - H0(" smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1,\n"); - H0(" bt2020-10, bt2020-12, smpte2084, smpte428, arib-std-b67. Default undef\n"); - H1(" --colormatrix <string> Specify color matrix setting from undef, bt709, fcc, bt470bg, smpte170m,\n"); - H1(" smpte240m, GBR, YCgCo, bt2020nc, bt2020c, smpte2085, chroma-derived-nc, chroma-derived-c, ictcp. Default undef\n"); - H1(" --chromaloc <integer> Specify chroma sample location (0 to 5). Default of %d\n", param->vui.chromaSampleLocTypeTopField); - H0(" --master-display <string> SMPTE ST 2086 master display color volume info SEI (HDR)\n"); - H0(" format: G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)\n"); - H0(" --max-cll <string> Specify content light level info SEI as \"cll,fall\" (HDR).\n"); - H0(" --[no-]cll Emit content light level info SEI. Default %s\n", OPT(param->bEmitCLL)); - H0(" --[no-]hdr10 Control dumping of HDR10 SEI packet. If max-cll or master-display has non-zero values, this is enabled. Default %s\n", OPT(param->bEmitHDR10SEI)); - H0(" --[no-]hdr-opt Add luma and chroma offsets for HDR/WCG content. Default %s. Now deprecated.\n", OPT(param->bHDROpt)); - H0(" --[no-]hdr10-opt Block-level QP optimization for HDR10 content. Default %s.\n", OPT(param->bHDR10Opt)); - H0(" --min-luma <integer> Minimum luma plane value of input source picture\n"); - H0(" --max-luma <integer> Maximum luma plane value of input source picture\n"); - H0("\nBitstream options:\n"); - H0(" --[no-]repeat-headers Emit SPS and PPS headers at each keyframe. Default %s\n", OPT(param->bRepeatHeaders)); - H0(" --[no-]info Emit SEI identifying encoder and parameters. Default %s\n", OPT(param->bEmitInfoSEI)); - H0(" --[no-]hrd Enable HRD parameters signaling. Default %s\n", OPT(param->bEmitHRDSEI)); - H0(" --[no-]idr-recovery-sei Emit recovery point infor SEI at each IDR frame \n"); - H0(" --[no-]temporal-layers Enable a temporal sublayer for unreferenced B frames. Default %s\n", OPT(param->bEnableTemporalSubLayers)); - H0(" --[no-]aud Emit access unit delimiters at the start of each access unit. Default %s\n", OPT(param->bEnableAccessUnitDelimiters)); - H1(" --hash <integer> Decoded Picture Hash SEI 0: disabled, 1: MD5, 2: CRC, 3: Checksum. Default %d\n", param->decodedPictureHashSEI); - H0(" --atc-sei <integer> Emit the alternative transfer characteristics SEI message where the integer is the preferred transfer characteristics. Default disabled\n"); - H0(" --pic-struct <integer> Set the picture structure and emits it in the picture timing SEI message. Values in the range 0..12. See D.3.3 of the HEVC spec. for a detailed explanation.\n"); - H0(" --log2-max-poc-lsb <integer> Maximum of the picture order count\n"); - H0(" --[no-]vui-timing-info Emit VUI timing information in the bistream. Default %s\n", OPT(param->bEmitVUITimingInfo)); - H0(" --[no-]vui-hrd-info Emit VUI HRD information in the bistream. Default %s\n", OPT(param->bEmitVUIHRDInfo)); - H0(" --[no-]opt-qp-pps Dynamically optimize QP in PPS (instead of default 26) based on QPs in previous GOP. Default %s\n", OPT(param->bOptQpPPS)); - H0(" --[no-]opt-ref-list-length-pps Dynamically set L0 and L1 ref list length in PPS (instead of default 0) based on values in last GOP. Default %s\n", OPT(param->bOptRefListLengthPPS)); - H0(" --[no-]multi-pass-opt-rps Enable storing commonly used RPS in SPS in multi pass mode. Default %s\n", OPT(param->bMultiPassOptRPS)); - H0(" --[no-]opt-cu-delta-qp Optimize to signal consistent CU level delta QPs in frame. Default %s\n", OPT(param->bOptCUDeltaQP)); - H1("\nReconstructed video options (debugging):\n"); - H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n"); - H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n"); - H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n"); - H0(" --lowpass-dct Use low-pass subband dct approximation. Default %s\n", OPT(param->bLowPassDct)); - H0(" --[no-]frame-dup Enable Frame duplication. Default %s\n", OPT(param->bEnableFrameDuplication)); - H0(" --dup-threshold <integer> PSNR threshold for Frame duplication. Default %d\n", param->dupThreshold); -#ifdef SVT_HEVC - H0(" --[no]svt Enable SVT HEVC encoder %s\n", OPT(param->bEnableSvtHevc)); - H0(" --[no-]svt-hme Enable Hierarchial motion estimation(HME) in SVT HEVC encoder \n"); - H0(" --svt-search-width Motion estimation search area width for SVT HEVC encoder \n"); - H0(" --svt-search-height Motion estimation search area height for SVT HEVC encoder \n"); - H0(" --[no-]svt-compressed-ten-bit-format Enable 8+2 encoding mode for 10bit input in SVT HEVC encoder \n"); - H0(" --[no-]svt-speed-control Enable speed control functionality to achieve real time encoding speed for SVT HEVC encoder \n"); - H0(" --svt-preset-tuner Enable additional faster presets of SVT; This only has to be used on top of x265's ultrafast preset. Accepts values in the range of 0-2 \n"); - H0(" --svt-hierarchical-level Hierarchical layer for SVT-HEVC encoder; Accepts inputs in the range 0-3 \n"); - H0(" --svt-base-layer-switch-mode Select whether B/P slice should be used in base layer for SVT-HEVC encoder. 0-Use B-frames; 1-Use P frames in the base layer \n"); - H0(" --svt-pred-struct Select pred structure for SVT HEVC encoder; Accepts inputs in the range 0-2 \n"); - H0(" --[no-]svt-fps-in-vps Enable VPS timing info for SVT HEVC encoder \n"); -#endif - H1("\nExecutable return codes:\n"); - H1(" 0 - encode successful\n"); - H1(" 1 - unable to parse command line\n"); - H1(" 2 - unable to open encoder\n"); - H1(" 3 - unable to generate stream headers\n"); - H1(" 4 - encoder abort\n"); -#undef OPT -#undef H0 -#undef H1 - if (level < X265_LOG_DEBUG) - printf("\nUse --fullhelp for a full listing (or --log-level full --help)\n"); - printf("\n\nComplete documentation may be found at http://x265.readthedocs.org/en/default/cli.html\n"); - exit(1); -} + /* in microseconds */ + static const int UPDATE_INTERVAL = 250000; + CLIOptions() + { + input = NULL; + recon = NULL; + output = NULL; + qpfile = NULL; + zoneFile = NULL; + dolbyVisionRpu = NULL; + reconPlayCmd = NULL; + api = NULL; + param = NULL; + vmafData = NULL; + framesToBeEncoded = seek = 0; + totalbytes = 0; + bProgress = true; + bForceY4m = false; + startTime = x265_mdate(); + prevUpdateTime = 0; + bDither = false; + isAbrLadderConfig = false; + enableScaler = false; + encName = NULL; + reuseName = NULL; + encId = 0; + refId = -1; + loadLevel = 0; + saveLevel = 0; + numRefs = 0; + argCnt = 0; + } + void destroy(); + void printStatus(uint32_t frameNum); + bool parse(int argc, char **argv); + bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount); + bool parseQPFile(x265_picture &pic_org); + bool parseZoneFile(); + int rpuParser(x265_picture * pic); + }; #ifdef __cplusplus } #endif
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.